aboutsummaryrefslogtreecommitdiff
path: root/arch/loongarch
diff options
context:
space:
mode:
authorMaxime Ripard2023-05-09 15:03:40 +0200
committerMaxime Ripard2023-05-09 15:03:40 +0200
commitff32fcca64437f679a2bf1c0a19d5def389a18e2 (patch)
tree122863d5d6159b30fd6834cbe599f8ce1b9e8144 /arch/loongarch
parent79c87edd18ec49f5b6fb40175bd1b1fea9398fdb (diff)
parentac9a78681b921877518763ba0e89202254349d1b (diff)
Merge drm/drm-next into drm-misc-next
Start the 6.5 release cycle. Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Diffstat (limited to 'arch/loongarch')
-rw-r--r--arch/loongarch/Kconfig36
-rw-r--r--arch/loongarch/Makefile2
-rw-r--r--arch/loongarch/configs/loongson3_defconfig1
-rw-r--r--arch/loongarch/crypto/Kconfig14
-rw-r--r--arch/loongarch/crypto/Makefile6
-rw-r--r--arch/loongarch/crypto/crc32-loongarch.c304
-rw-r--r--arch/loongarch/include/asm/acpi.h3
-rw-r--r--arch/loongarch/include/asm/addrspace.h4
-rw-r--r--arch/loongarch/include/asm/bootinfo.h1
-rw-r--r--arch/loongarch/include/asm/checksum.h66
-rw-r--r--arch/loongarch/include/asm/cmpxchg.h4
-rw-r--r--arch/loongarch/include/asm/cpu-features.h1
-rw-r--r--arch/loongarch/include/asm/cpu.h40
-rw-r--r--arch/loongarch/include/asm/fpu.h3
-rw-r--r--arch/loongarch/include/asm/ftrace.h37
-rw-r--r--arch/loongarch/include/asm/inst.h26
-rw-r--r--arch/loongarch/include/asm/io.h4
-rw-r--r--arch/loongarch/include/asm/local.h13
-rw-r--r--arch/loongarch/include/asm/loongarch.h63
-rw-r--r--arch/loongarch/include/asm/module.lds.h8
-rw-r--r--arch/loongarch/include/asm/ptrace.h5
-rw-r--r--arch/loongarch/include/asm/smp.h2
-rw-r--r--arch/loongarch/include/uapi/asm/ptrace.h3
-rw-r--r--arch/loongarch/kernel/Makefile2
-rw-r--r--arch/loongarch/kernel/cpu-probe.c9
-rw-r--r--arch/loongarch/kernel/ftrace_dyn.c128
-rw-r--r--arch/loongarch/kernel/genex.S1
-rw-r--r--arch/loongarch/kernel/irq.c2
-rw-r--r--arch/loongarch/kernel/kfpu.c43
-rw-r--r--arch/loongarch/kernel/mcount_dyn.S13
-rw-r--r--arch/loongarch/kernel/perf_event.c2
-rw-r--r--arch/loongarch/kernel/proc.c1
-rw-r--r--arch/loongarch/kernel/process.c2
-rw-r--r--arch/loongarch/kernel/ptrace.c25
-rw-r--r--arch/loongarch/kernel/setup.c25
-rw-r--r--arch/loongarch/kernel/smp.c6
-rw-r--r--arch/loongarch/kernel/stacktrace.c2
-rw-r--r--arch/loongarch/kernel/time.c2
-rw-r--r--arch/loongarch/kernel/traps.c318
-rw-r--r--arch/loongarch/kernel/unwind.c1
-rw-r--r--arch/loongarch/kernel/unwind_prologue.c4
-rw-r--r--arch/loongarch/lib/Makefile4
-rw-r--r--arch/loongarch/lib/clear_user.S136
-rw-r--r--arch/loongarch/lib/copy_user.S251
-rw-r--r--arch/loongarch/lib/csum.c141
-rw-r--r--arch/loongarch/lib/error-inject.c10
-rw-r--r--arch/loongarch/lib/memcpy.S147
-rw-r--r--arch/loongarch/lib/memmove.S120
-rw-r--r--arch/loongarch/lib/memset.S116
-rw-r--r--arch/loongarch/mm/init.c4
-rw-r--r--arch/loongarch/net/bpf_jit.c10
-rw-r--r--arch/loongarch/power/suspend_asm.S4
-rw-r--r--arch/loongarch/vdso/Makefile4
53 files changed, 1813 insertions, 366 deletions
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 7fd51257e0ed..d38b066fc931 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -10,6 +10,7 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
@@ -53,8 +54,8 @@ config LOONGARCH
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
- select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANT_OPTIMIZE_VMEMMAP
select ARCH_WANTS_NO_INSTR
select BUILDTIME_TABLE_SORT
select COMMON_CLK
@@ -80,6 +81,7 @@ config LOONGARCH
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GPIOLIB
+ select HAS_IOPORT
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER
@@ -92,6 +94,7 @@ config LOONGARCH
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
@@ -99,6 +102,7 @@ config LOONGARCH
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
+ select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GENERIC_VDSO
@@ -117,6 +121,8 @@ config LOONGARCH
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
+ select HAVE_SAMPLE_FTRACE_DIRECT
+ select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_SETUP_PER_CPU_AREA if NUMA
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
@@ -420,12 +426,9 @@ config NODES_SHIFT
config ARCH_FORCE_MAX_ORDER
int "Maximum zone order"
- range 14 64 if PAGE_SIZE_64KB
- default "14" if PAGE_SIZE_64KB
- range 12 64 if PAGE_SIZE_16KB
- default "12" if PAGE_SIZE_16KB
- range 11 64
- default "11"
+ default "13" if PAGE_SIZE_64KB
+ default "11" if PAGE_SIZE_16KB
+ default "10"
help
The kernel memory allocator divides physically contiguous memory
blocks into "zones", where each zone is a power of two number of
@@ -434,9 +437,6 @@ config ARCH_FORCE_MAX_ORDER
blocks of physically contiguous memory, then you may need to
increase this value.
- This config option is actually maximum order plus one. For example,
- a value of 11 means that the largest free memory block is 2^10 pages.
-
The page size is not necessarily 4KB. Keep this in mind
when choosing a value for this option.
@@ -447,6 +447,22 @@ config ARCH_IOREMAP
protection support. However, you can enable LoongArch DMW-based
ioremap() for better performance.
+config ARCH_WRITECOMBINE
+ bool "Enable WriteCombine (WUC) for ioremap()"
+ help
+ LoongArch maintains cache coherency in hardware, but when paired
+ with LS7A chipsets the WUC attribute (Weak-ordered UnCached, which
+ is similar to WriteCombine) is out of the scope of cache coherency
+ machanism for PCIe devices (this is a PCIe protocol violation, which
+ may be fixed in newer chipsets).
+
+ This means WUC can only used for write-only memory regions now, so
+ this option is disabled by default, making WUC silently fallback to
+ SUC for ioremap(). You can enable this option if the kernel is ensured
+ to run on hardware without this bug.
+
+ You can override this setting via writecombine=on/off boot parameter.
+
config ARCH_STRICT_ALIGN
bool "Enable -mstrict-align to prevent unaligned accesses" if EXPERT
default y
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index f71edf574101..a27e264bdaa5 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -115,6 +115,8 @@ endif
libs-y += arch/loongarch/lib/
libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+drivers-y += arch/loongarch/crypto/
+
# suspend and hibernation support
drivers-$(CONFIG_PM) += arch/loongarch/power/
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index e18213f01cc4..6cd26dd3c134 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -487,7 +487,6 @@ CONFIG_CHELSIO_T4=m
CONFIG_E1000=y
CONFIG_E1000E=y
CONFIG_IGB=y
-CONFIG_IXGB=y
CONFIG_IXGBE=y
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MELLANOX is not set
diff --git a/arch/loongarch/crypto/Kconfig b/arch/loongarch/crypto/Kconfig
new file mode 100644
index 000000000000..200a6e8b43b1
--- /dev/null
+++ b/arch/loongarch/crypto/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (loongarch)"
+
+config CRYPTO_CRC32_LOONGARCH
+ tristate "CRC32c and CRC32"
+ select CRC32
+ select CRYPTO_HASH
+ help
+ CRC32c and CRC32 CRC algorithms
+
+ Architecture: LoongArch with CRC32 instructions
+
+endmenu
diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile
new file mode 100644
index 000000000000..d22613d27ce9
--- /dev/null
+++ b/arch/loongarch/crypto/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for LoongArch crypto files..
+#
+
+obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) += crc32-loongarch.o
diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c
new file mode 100644
index 000000000000..1f2a2c3839bc
--- /dev/null
+++ b/arch/loongarch/crypto/crc32-loongarch.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions
+ *
+ * Module based on mips/crypto/crc32-mips.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2018 MIPS Tech, LLC
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/module.h>
+#include <crypto/internal/hash.h>
+
+#include <asm/cpu-features.h>
+#include <asm/unaligned.h>
+
+#define _CRC32(crc, value, size, type) \
+do { \
+ __asm__ __volatile__( \
+ #type ".w." #size ".w" " %0, %1, %0\n\t"\
+ : "+r" (crc) \
+ : "r" (value) \
+ : "memory"); \
+} while (0)
+
+#define CRC32(crc, value, size) _CRC32(crc, value, size, crc)
+#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc)
+
+static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+ u32 crc = crc_;
+
+ while (len >= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32(crc, value, d);
+ p += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32(crc, value, b);
+ }
+
+ return crc;
+}
+
+static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+ u32 crc = crc_;
+
+ while (len >= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32C(crc, value, d);
+ p += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32C(crc, value, w);
+ p += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32C(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32C(crc, value, b);
+ }
+
+ return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE 1
+#define CHKSUM_DIGEST_SIZE 4
+
+struct chksum_ctx {
+ u32 key;
+};
+
+struct chksum_desc_ctx {
+ u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = mctx->key;
+
+ return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+ if (keylen != sizeof(mctx->key))
+ return -EINVAL;
+
+ mctx->key = get_unaligned_le32(key);
+
+ return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = crc32_loongarch_hw(ctx->crc, data, length);
+ return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigned int length)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = crc32c_loongarch_hw(ctx->crc, data, length);
+ return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ put_unaligned_le32(ctx->crc, out);
+ return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ put_unaligned_le32(~ctx->crc, out);
+ return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+ put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out);
+ return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+ put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out);
+ return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+ return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+ return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int chksum_cra_init(struct crypto_tfm *tfm)
+{
+ struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+ mctx->key = 0;
+ return 0;
+}
+
+static int chksumc_cra_init(struct crypto_tfm *tfm)
+{
+ struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+ mctx->key = ~0;
+ return 0;
+}
+
+static struct shash_alg crc32_alg = {
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .setkey = chksum_setkey,
+ .init = chksum_init,
+ .update = chksum_update,
+ .final = chksum_final,
+ .finup = chksum_finup,
+ .digest = chksum_digest,
+ .descsize = sizeof(struct chksum_desc_ctx),
+ .base = {
+ .cra_name = "crc32",
+ .cra_driver_name = "crc32-loongarch",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+ .cra_alignmask = 0,
+ .cra_ctxsize = sizeof(struct chksum_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = chksum_cra_init,
+ }
+};
+
+static struct shash_alg crc32c_alg = {
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .setkey = chksum_setkey,
+ .init = chksum_init,
+ .update = chksumc_update,
+ .final = chksumc_final,
+ .finup = chksumc_finup,
+ .digest = chksumc_digest,
+ .descsize = sizeof(struct chksum_desc_ctx),
+ .base = {
+ .cra_name = "crc32c",
+ .cra_driver_name = "crc32c-loongarch",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+ .cra_alignmask = 0,
+ .cra_ctxsize = sizeof(struct chksum_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = chksumc_cra_init,
+ }
+};
+
+static int __init crc32_mod_init(void)
+{
+ int err;
+
+ if (!cpu_has(CPU_FEATURE_CRC32))
+ return 0;
+
+ err = crypto_register_shash(&crc32_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_shash(&crc32c_alg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+ if (!cpu_has(CPU_FEATURE_CRC32))
+ return;
+
+ crypto_unregister_shash(&crc32_alg);
+ crypto_unregister_shash(&crc32c_alg);
+}
+
+module_init(crc32_mod_init);
+module_exit(crc32_mod_exit);
+
+MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>");
+MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
+MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 4198753aa1d0..976a810352c6 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -41,8 +41,11 @@ extern void loongarch_suspend_enter(void);
static inline unsigned long acpi_get_wakeup_address(void)
{
+#ifdef CONFIG_SUSPEND
extern void loongarch_wakeup_start(void);
return (unsigned long)loongarch_wakeup_start;
+#endif
+ return 0UL;
}
#endif /* _ASM_LOONGARCH_ACPI_H */
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index 8fb699b4d40a..5c9c03bdf915 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -71,9 +71,9 @@ extern unsigned long vm_map_base;
#define _ATYPE32_ int
#define _ATYPE64_ __s64
#ifdef CONFIG_64BIT
-#define _CONST64_(x) x ## L
+#define _CONST64_(x) x ## UL
#else
-#define _CONST64_(x) x ## LL
+#define _CONST64_(x) x ## ULL
#endif
#endif
diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h
index 0051b526ac6d..c60796869b2b 100644
--- a/arch/loongarch/include/asm/bootinfo.h
+++ b/arch/loongarch/include/asm/bootinfo.h
@@ -13,7 +13,6 @@ const char *get_system_type(void);
extern void init_environ(void);
extern void memblock_init(void);
extern void platform_init(void);
-extern void plat_swiotlb_setup(void);
extern int __init init_numa_memory(void);
struct loongson_board_info {
diff --git a/arch/loongarch/include/asm/checksum.h b/arch/loongarch/include/asm/checksum.h
new file mode 100644
index 000000000000..cabbf6af44c4
--- /dev/null
+++ b/arch/loongarch/include/asm/checksum.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2016 ARM Ltd.
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#ifndef __ASM_CHECKSUM_H
+#define __ASM_CHECKSUM_H
+
+#include <linux/bitops.h>
+#include <linux/in6.h>
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
+
+/*
+ * turns a 32-bit partial checksum (e.g. from csum_partial) into a
+ * 1's complement 16-bit checksum.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+ u32 tmp = (__force u32)sum;
+
+ /*
+ * swap the two 16-bit halves of sum
+ * if there is a carry from adding the two 16-bit halves,
+ * it will carry from the lower half into the upper half,
+ * giving us the correct sum in the upper half.
+ */
+ return (__force __sum16)(~(tmp + rol32(tmp, 16)) >> 16);
+}
+#define csum_fold csum_fold
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries. ihl is the number
+ * of 32-bit words and is always >= 5.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ u64 sum;
+ __uint128_t tmp;
+ int n = ihl; /* we want it signed */
+
+ tmp = *(const __uint128_t *)iph;
+ iph += 16;
+ n -= 4;
+ tmp += ((tmp >> 64) | (tmp << 64));
+ sum = tmp >> 64;
+ do {
+ sum += *(const u32 *)iph;
+ iph += 4;
+ } while (--n > 0);
+
+ sum += ror64(sum, 32);
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+#define ip_fast_csum ip_fast_csum
+
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+#include <asm-generic/checksum.h>
+
+#endif /* __ASM_CHECKSUM_H */
diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h
index ecfa6cf79806..979fde61bba8 100644
--- a/arch/loongarch/include/asm/cmpxchg.h
+++ b/arch/loongarch/include/asm/cmpxchg.h
@@ -62,7 +62,7 @@ static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val,
}
static __always_inline unsigned long
-__xchg(volatile void *ptr, unsigned long x, int size)
+__arch_xchg(volatile void *ptr, unsigned long x, int size)
{
switch (size) {
case 1:
@@ -87,7 +87,7 @@ __xchg(volatile void *ptr, unsigned long x, int size)
__typeof__(*(ptr)) __res; \
\
__res = (__typeof__(*(ptr))) \
- __xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \
+ __arch_xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \
\
__res; \
})
diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
index b07974218393..f6177f133477 100644
--- a/arch/loongarch/include/asm/cpu-features.h
+++ b/arch/loongarch/include/asm/cpu-features.h
@@ -42,6 +42,7 @@
#define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU)
#define cpu_has_lsx cpu_opt(LOONGARCH_CPU_LSX)
#define cpu_has_lasx cpu_opt(LOONGARCH_CPU_LASX)
+#define cpu_has_crc32 cpu_opt(LOONGARCH_CPU_CRC32)
#define cpu_has_complex cpu_opt(LOONGARCH_CPU_COMPLEX)
#define cpu_has_crypto cpu_opt(LOONGARCH_CPU_CRYPTO)
#define cpu_has_lvz cpu_opt(LOONGARCH_CPU_LVZ)
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index c3da91759472..88773d849e33 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -78,25 +78,26 @@ enum cpu_type_enum {
#define CPU_FEATURE_FPU 3 /* CPU has FPU */
#define CPU_FEATURE_LSX 4 /* CPU has LSX (128-bit SIMD) */
#define CPU_FEATURE_LASX 5 /* CPU has LASX (256-bit SIMD) */
-#define CPU_FEATURE_COMPLEX 6 /* CPU has Complex instructions */
-#define CPU_FEATURE_CRYPTO 7 /* CPU has Crypto instructions */
-#define CPU_FEATURE_LVZ 8 /* CPU has Virtualization extension */
-#define CPU_FEATURE_LBT_X86 9 /* CPU has X86 Binary Translation */
-#define CPU_FEATURE_LBT_ARM 10 /* CPU has ARM Binary Translation */
-#define CPU_FEATURE_LBT_MIPS 11 /* CPU has MIPS Binary Translation */
-#define CPU_FEATURE_TLB 12 /* CPU has TLB */
-#define CPU_FEATURE_CSR 13 /* CPU has CSR */
-#define CPU_FEATURE_WATCH 14 /* CPU has watchpoint registers */
-#define CPU_FEATURE_VINT 15 /* CPU has vectored interrupts */
-#define CPU_FEATURE_CSRIPI 16 /* CPU has CSR-IPI */
-#define CPU_FEATURE_EXTIOI 17 /* CPU has EXT-IOI */
-#define CPU_FEATURE_PREFETCH 18 /* CPU has prefetch instructions */
-#define CPU_FEATURE_PMP 19 /* CPU has perfermance counter */
-#define CPU_FEATURE_SCALEFREQ 20 /* CPU supports cpufreq scaling */
-#define CPU_FEATURE_FLATMODE 21 /* CPU has flat mode */
-#define CPU_FEATURE_EIODECODE 22 /* CPU has EXTIOI interrupt pin decode mode */
-#define CPU_FEATURE_GUESTID 23 /* CPU has GuestID feature */
-#define CPU_FEATURE_HYPERVISOR 24 /* CPU has hypervisor (running in VM) */
+#define CPU_FEATURE_CRC32 6 /* CPU has CRC32 instructions */
+#define CPU_FEATURE_COMPLEX 7 /* CPU has Complex instructions */
+#define CPU_FEATURE_CRYPTO 8 /* CPU has Crypto instructions */
+#define CPU_FEATURE_LVZ 9 /* CPU has Virtualization extension */
+#define CPU_FEATURE_LBT_X86 10 /* CPU has X86 Binary Translation */
+#define CPU_FEATURE_LBT_ARM 11 /* CPU has ARM Binary Translation */
+#define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */
+#define CPU_FEATURE_TLB 13 /* CPU has TLB */
+#define CPU_FEATURE_CSR 14 /* CPU has CSR */
+#define CPU_FEATURE_WATCH 15 /* CPU has watchpoint registers */
+#define CPU_FEATURE_VINT 16 /* CPU has vectored interrupts */
+#define CPU_FEATURE_CSRIPI 17 /* CPU has CSR-IPI */
+#define CPU_FEATURE_EXTIOI 18 /* CPU has EXT-IOI */
+#define CPU_FEATURE_PREFETCH 19 /* CPU has prefetch instructions */
+#define CPU_FEATURE_PMP 20 /* CPU has perfermance counter */
+#define CPU_FEATURE_SCALEFREQ 21 /* CPU supports cpufreq scaling */
+#define CPU_FEATURE_FLATMODE 22 /* CPU has flat mode */
+#define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */
+#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */
+#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */
#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG)
#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM)
@@ -104,6 +105,7 @@ enum cpu_type_enum {
#define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU)
#define LOONGARCH_CPU_LSX BIT_ULL(CPU_FEATURE_LSX)
#define LOONGARCH_CPU_LASX BIT_ULL(CPU_FEATURE_LASX)
+#define LOONGARCH_CPU_CRC32 BIT_ULL(CPU_FEATURE_CRC32)
#define LOONGARCH_CPU_COMPLEX BIT_ULL(CPU_FEATURE_COMPLEX)
#define LOONGARCH_CPU_CRYPTO BIT_ULL(CPU_FEATURE_CRYPTO)
#define LOONGARCH_CPU_LVZ BIT_ULL(CPU_FEATURE_LVZ)
diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index 358b254d9c1d..192f8e35d912 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -21,6 +21,9 @@
struct sigcontext;
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
+
extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *);
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index 3418d32d4fc7..23e2ba78dcb0 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -54,9 +54,46 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
return &fregs->regs;
}
+static __always_inline unsigned long
+ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
+{
+ return instruction_pointer(&fregs->regs);
+}
+
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip)
+{
+ regs_set_return_value(&fregs->regs, ip);
+}
+
+#define ftrace_regs_get_argument(fregs, n) \
+ regs_get_kernel_argument(&(fregs)->regs, n)
+#define ftrace_regs_get_stack_pointer(fregs) \
+ kernel_stack_pointer(&(fregs)->regs)
+#define ftrace_regs_return_value(fregs) \
+ regs_return_value(&(fregs)->regs)
+#define ftrace_regs_set_return_value(fregs, ret) \
+ regs_set_return_value(&(fregs)->regs, ret)
+#define ftrace_override_function_with_return(fregs) \
+ override_function_with_return(&(fregs)->regs)
+#define ftrace_regs_query_register_offset(name) \
+ regs_query_register_offset(name)
+
#define ftrace_graph_func ftrace_graph_func
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+static inline void
+__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+ regs->regs[13] = addr; /* t1 */
+}
+
+#define arch_ftrace_set_direct_caller(fregs, addr) \
+ __arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index a04fe755d719..b09887ffcd15 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -121,6 +121,8 @@ enum reg2bstrd_op {
};
enum reg3_op {
+ asrtle_op = 0x02,
+ asrtgt_op = 0x03,
addw_op = 0x20,
addd_op = 0x21,
subw_op = 0x22,
@@ -176,6 +178,30 @@ enum reg3_op {
amord_op = 0x70c7,
amxorw_op = 0x70c8,
amxord_op = 0x70c9,
+ fldgts_op = 0x70e8,
+ fldgtd_op = 0x70e9,
+ fldles_op = 0x70ea,
+ fldled_op = 0x70eb,
+ fstgts_op = 0x70ec,
+ fstgtd_op = 0x70ed,
+ fstles_op = 0x70ee,
+ fstled_op = 0x70ef,
+ ldgtb_op = 0x70f0,
+ ldgth_op = 0x70f1,
+ ldgtw_op = 0x70f2,
+ ldgtd_op = 0x70f3,
+ ldleb_op = 0x70f4,
+ ldleh_op = 0x70f5,
+ ldlew_op = 0x70f6,
+ ldled_op = 0x70f7,
+ stgtb_op = 0x70f8,
+ stgth_op = 0x70f9,
+ stgtw_op = 0x70fa,
+ stgtd_op = 0x70fb,
+ stleb_op = 0x70fc,
+ stleh_op = 0x70fd,
+ stlew_op = 0x70fe,
+ stled_op = 0x70ff,
};
enum reg3sa2_op {
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 402a7d9e3a53..545e2708fbf7 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -54,8 +54,10 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
* @offset: bus address of the memory
* @size: size of the resource to map
*/
+extern pgprot_t pgprot_wc;
+
#define ioremap_wc(offset, size) \
- ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC))
+ ioremap_prot((offset), (size), pgprot_val(pgprot_wc))
#define ioremap_cache(offset, size) \
ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL))
diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h
index 65fbbae9fc4d..83e995b30e47 100644
--- a/arch/loongarch/include/asm/local.h
+++ b/arch/loongarch/include/asm/local.h
@@ -56,8 +56,17 @@ static inline long local_sub_return(long i, local_t *l)
return result;
}
-#define local_cmpxchg(l, o, n) \
- ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
+static inline long local_cmpxchg(local_t *l, long old, long new)
+{
+ return cmpxchg_local(&l->a.counter, old, new);
+}
+
+static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
+{
+ typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
+ return try_cmpxchg_local(&l->a.counter, __old, new);
+}
+
#define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
/**
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 65b7dcdea16d..b3323ab5b78d 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -117,7 +117,7 @@ static inline u32 read_cpucfg(u32 reg)
#define CPUCFG1_EP BIT(22)
#define CPUCFG1_RPLV BIT(23)
#define CPUCFG1_HUGEPG BIT(24)
-#define CPUCFG1_IOCSRBRD BIT(25)
+#define CPUCFG1_CRC32 BIT(25)
#define CPUCFG1_MSGINT BIT(26)
#define LOONGARCH_CPUCFG2 0x2
@@ -311,8 +311,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ECFG_VS_WIDTH 3
#define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT)
#define CSR_ECFG_IM_SHIFT 0
-#define CSR_ECFG_IM_WIDTH 13
-#define CSR_ECFG_IM (_ULCAST_(0x1fff) << CSR_ECFG_IM_SHIFT)
+#define CSR_ECFG_IM_WIDTH 14
+#define CSR_ECFG_IM (_ULCAST_(0x3fff) << CSR_ECFG_IM_SHIFT)
#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
#define CSR_ESTAT_ESUBCODE_SHIFT 22
@@ -322,8 +322,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ESTAT_EXC_WIDTH 6
#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT)
#define CSR_ESTAT_IS_SHIFT 0
-#define CSR_ESTAT_IS_WIDTH 15
-#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)
+#define CSR_ESTAT_IS_WIDTH 14
+#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT)
#define LOONGARCH_CSR_ERA 0x6 /* ERA */
@@ -423,9 +423,9 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ASID_ASID_WIDTH 10
#define CSR_ASID_ASID (_ULCAST_(0x3ff) << CSR_ASID_ASID_SHIFT)
-#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[47] = 0 */
+#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[VALEN-1] = 0 */
-#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[47] = 1 */
+#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[VALEN-1] = 1 */
#define LOONGARCH_CSR_PGD 0x1b /* Page table base */
@@ -1090,7 +1090,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI)
#define ECFGF(hwirq) (_ULCAST_(1) << hwirq)
-#define ESTATF_IP 0x00001fff
+#define ESTATF_IP 0x00003fff
#define LOONGARCH_IOCSR_FEATURES 0x8
#define IOCSRF_TEMP BIT_ULL(0)
@@ -1397,7 +1397,7 @@ __BUILD_CSR_OP(tlbidx)
#define EXSUBCODE_ADEF 0 /* Fetch Instruction */
#define EXSUBCODE_ADEM 1 /* Access Memory*/
#define EXCCODE_ALE 9 /* Unalign Access */
-#define EXCCODE_OOB 10 /* Out of bounds */
+#define EXCCODE_BCE 10 /* Bounds Check Error */
#define EXCCODE_SYS 11 /* System call */
#define EXCCODE_BP 12 /* Breakpoint */
#define EXCCODE_INE 13 /* Inst. Not Exist */
@@ -1408,33 +1408,38 @@ __BUILD_CSR_OP(tlbidx)
#define EXCCODE_FPE 18 /* Floating Point Exception */
#define EXCSUBCODE_FPE 0 /* Floating Point Exception */
#define EXCSUBCODE_VFPE 1 /* Vector Exception */
-#define EXCCODE_WATCH 19 /* Watch address reference */
+#define EXCCODE_WATCH 19 /* WatchPoint Exception */
+ #define EXCSUBCODE_WPEF 0 /* ... on Instruction Fetch */
+ #define EXCSUBCODE_WPEM 1 /* ... on Memory Accesses */
#define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */
#define EXCCODE_BTE 21 /* Binary Trans. Exception */
-#define EXCCODE_PSI 22 /* Guest Privileged Error */
-#define EXCCODE_HYP 23 /* Hypercall */
+#define EXCCODE_GSPR 22 /* Guest Privileged Error */
+#define EXCCODE_HVC 23 /* Hypercall */
#define EXCCODE_GCM 24 /* Guest CSR modified */
#define EXCSUBCODE_GCSC 0 /* Software caused */
#define EXCSUBCODE_GCHC 1 /* Hardware caused */
#define EXCCODE_SE 25 /* Security */
-#define EXCCODE_INT_START 64
-#define EXCCODE_SIP0 64
-#define EXCCODE_SIP1 65
-#define EXCCODE_IP0 66
-#define EXCCODE_IP1 67
-#define EXCCODE_IP2 68
-#define EXCCODE_IP3 69
-#define EXCCODE_IP4 70
-#define EXCCODE_IP5 71
-#define EXCCODE_IP6 72
-#define EXCCODE_IP7 73
-#define EXCCODE_PMC 74 /* Performance Counter */
-#define EXCCODE_TIMER 75
-#define EXCCODE_IPI 76
-#define EXCCODE_NMI 77
-#define EXCCODE_INT_END 78
-#define EXCCODE_INT_NUM (EXCCODE_INT_END - EXCCODE_INT_START)
+/* Interrupt numbers */
+#define INT_SWI0 0 /* Software Interrupts */
+#define INT_SWI1 1
+#define INT_HWI0 2 /* Hardware Interrupts */
+#define INT_HWI1 3
+#define INT_HWI2 4
+#define INT_HWI3 5
+#define INT_HWI4 6
+#define INT_HWI5 7
+#define INT_HWI6 8
+#define INT_HWI7 9
+#define INT_PCOV 10 /* Performance Counter Overflow */
+#define INT_TI 11 /* Timer */
+#define INT_IPI 12
+#define INT_NMI 13
+
+/* ExcCodes corresponding to interrupts */
+#define EXCCODE_INT_NUM (INT_NMI + 1)
+#define EXCCODE_INT_START 64
+#define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1)
/* FPU register names */
#define LOONGARCH_FCSR0 $r0
diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h
index 438f09d4ccf4..88554f92e010 100644
--- a/arch/loongarch/include/asm/module.lds.h
+++ b/arch/loongarch/include/asm/module.lds.h
@@ -2,8 +2,8 @@
/* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */
SECTIONS {
. = ALIGN(4);
- .got : { BYTE(0) }
- .plt : { BYTE(0) }
- .plt.idx : { BYTE(0) }
- .ftrace_trampoline : { BYTE(0) }
+ .got 0 : { BYTE(0) }
+ .plt 0 : { BYTE(0) }
+ .plt.idx 0 : { BYTE(0) }
+ .ftrace_trampoline 0 : { BYTE(0) }
}
diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index d761db943335..35f0958163ac 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -154,6 +154,11 @@ static inline long regs_return_value(struct pt_regs *regs)
return regs->regs[4];
}
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val)
+{
+ regs->regs[4] = val;
+}
+
#define instruction_pointer(regs) ((regs)->csr_era)
#define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index d82687390b4a..416b653bccb4 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -99,7 +99,7 @@ static inline void __cpu_die(unsigned int cpu)
loongson_cpu_die(cpu);
}
-extern void play_dead(void);
+extern void __noreturn play_dead(void);
#endif
#endif /* __ASM_SMP_H */
diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h
index cc48ed262021..82d811b5c6e9 100644
--- a/arch/loongarch/include/uapi/asm/ptrace.h
+++ b/arch/loongarch/include/uapi/asm/ptrace.h
@@ -47,11 +47,12 @@ struct user_fp_state {
};
struct user_watch_state {
- uint16_t dbg_info;
+ uint64_t dbg_info;
struct {
uint64_t addr;
uint64_t mask;
uint32_t ctrl;
+ uint32_t pad;
} dbg_regs[8];
};
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 78d4e3384305..9a72d91cd104 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -13,7 +13,7 @@ obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_EFI) += efi.o
-obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
+obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o
obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 3a3fce2d7846..5adf0f736c6d 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -60,7 +60,7 @@ static inline void set_elf_platform(int cpu, const char *plat)
/* MAP BASE */
unsigned long vm_map_base;
-EXPORT_SYMBOL_GPL(vm_map_base);
+EXPORT_SYMBOL(vm_map_base);
static void cpu_probe_addrbits(struct cpuinfo_loongarch *c)
{
@@ -94,13 +94,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR |
LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH;
- elf_hwcap = HWCAP_LOONGARCH_CPUCFG | HWCAP_LOONGARCH_CRC32;
+ elf_hwcap = HWCAP_LOONGARCH_CPUCFG;
config = read_cpucfg(LOONGARCH_CPUCFG1);
if (config & CPUCFG1_UAL) {
c->options |= LOONGARCH_CPU_UAL;
elf_hwcap |= HWCAP_LOONGARCH_UAL;
}
+ if (config & CPUCFG1_CRC32) {
+ c->options |= LOONGARCH_CPU_CRC32;
+ elf_hwcap |= HWCAP_LOONGARCH_CRC32;
+ }
+
config = read_cpucfg(LOONGARCH_CPUCFG2);
if (config & CPUCFG2_LAM) {
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index 4a3ef8516ccc..73858c9029cc 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -30,19 +30,12 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
return 0;
}
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-
#ifdef CONFIG_MODULES
-static inline int __get_mod(struct module **mod, unsigned long addr)
+static bool reachable_by_bl(unsigned long addr, unsigned long pc)
{
- preempt_disable();
- *mod = __module_text_address(addr);
- preempt_enable();
+ long offset = (long)addr - (long)pc;
- if (WARN_ON(!(*mod)))
- return -EINVAL;
-
- return 0;
+ return offset >= -SZ_128M && offset < SZ_128M;
}
static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
@@ -58,51 +51,88 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
return NULL;
}
-static unsigned long get_plt_addr(struct module *mod, unsigned long addr)
+/*
+ * Find the address the callsite must branch to in order to reach '*addr'.
+ *
+ * Due to the limited range of 'bl' instruction, modules may be placed too far
+ * away to branch directly and we must use a PLT.
+ *
+ * Returns true when '*addr' contains a reachable target address, or has been
+ * modified to contain a PLT address. Returns false otherwise.
+ */
+static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr)
{
+ unsigned long pc = rec->ip + LOONGARCH_INSN_SIZE;
struct plt_entry *plt;
- plt = get_ftrace_plt(mod, addr);
+ /*
+ * If a custom trampoline is unreachable, rely on the ftrace_regs_caller
+ * trampoline which knows how to indirectly reach that trampoline through
+ * ops->direct_call.
+ */
+ if (*addr != FTRACE_ADDR && *addr != FTRACE_REGS_ADDR && !reachable_by_bl(*addr, pc))
+ *addr = FTRACE_REGS_ADDR;
+
+ /*
+ * When the target is within range of the 'bl' instruction, use 'addr'
+ * as-is and branch to that directly.
+ */
+ if (reachable_by_bl(*addr, pc))
+ return true;
+
+ /*
+ * 'mod' is only set at module load time, but if we end up
+ * dealing with an out-of-range condition, we can assume it
+ * is due to a module being loaded far away from the kernel.
+ *
+ * NOTE: __module_text_address() must be called with preemption
+ * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * retains its validity throughout the remainder of this code.
+ */
+ if (!mod) {
+ preempt_disable();
+ mod = __module_text_address(pc);
+ preempt_enable();
+ }
+
+ if (WARN_ON(!mod))
+ return false;
+
+ plt = get_ftrace_plt(mod, *addr);
if (!plt) {
- pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
- return -EINVAL;
+ pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
+ return false;
}
- return (unsigned long)plt;
+ *addr = (unsigned long)plt;
+ return true;
+}
+#else /* !CONFIG_MODULES */
+static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr)
+{
+ return true;
}
#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
{
u32 old, new;
unsigned long pc;
- long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
-#ifdef CONFIG_MODULES
- offset = (long)pc - (long)addr;
-
- if (offset < -SZ_128M || offset >= SZ_128M) {
- int ret;
- struct module *mod;
-
- ret = __get_mod(&mod, pc);
- if (ret)
- return ret;
-
- addr = get_plt_addr(mod, addr);
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
- old_addr = get_plt_addr(mod, old_addr);
- }
-#endif
+ if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
+ return -EINVAL;
new = larch_insn_gen_bl(pc, addr);
old = larch_insn_gen_bl(pc, old_addr);
return ftrace_modify_code(pc, old, new, true);
}
-
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -153,24 +183,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
u32 old, new;
unsigned long pc;
- long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
-#ifdef CONFIG_MODULES
- offset = (long)pc - (long)addr;
-
- if (offset < -SZ_128M || offset >= SZ_128M) {
- int ret;
- struct module *mod;
-
- ret = __get_mod(&mod, pc);
- if (ret)
- return ret;
-
- addr = get_plt_addr(mod, addr);
- }
-#endif
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
old = larch_insn_gen_nop();
new = larch_insn_gen_bl(pc, addr);
@@ -182,24 +199,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad
{
u32 old, new;
unsigned long pc;
- long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
-#ifdef CONFIG_MODULES
- offset = (long)pc - (long)addr;
-
- if (offset < -SZ_128M || offset >= SZ_128M) {
- int ret;
- struct module *mod;
-
- ret = __get_mod(&mod, pc);
- if (ret)
- return ret;
-
- addr = get_plt_addr(mod, addr);
- }
-#endif
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
new = larch_insn_gen_nop();
old = larch_insn_gen_bl(pc, addr);
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 44ff1ff64260..78f066384657 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -82,6 +82,7 @@ SYM_FUNC_END(except_vec_cex)
BUILD_HANDLER ade ade badv
BUILD_HANDLER ale ale badv
+ BUILD_HANDLER bce bce none
BUILD_HANDLER bp bp none
BUILD_HANDLER fpe fpe fcsr
BUILD_HANDLER fpu fpu none
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 0524bf1169b7..883e5066ae44 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -92,7 +92,7 @@ static int __init get_ipi_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
- return irq_create_mapping(d, EXCCODE_IPI - EXCCODE_INT_START);
+ return irq_create_mapping(d, INT_IPI);
return -EINVAL;
}
diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
new file mode 100644
index 000000000000..5c46ae8c6cac
--- /dev/null
+++ b/arch/loongarch/kernel/kfpu.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <asm/fpu.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+void kernel_fpu_begin(void)
+{
+ preempt_disable();
+
+ WARN_ON(this_cpu_read(in_kernel_fpu));
+
+ this_cpu_write(in_kernel_fpu, true);
+
+ if (!is_fpu_owner())
+ enable_fpu();
+ else
+ _save_fp(&current->thread.fpu);
+
+ write_fcsr(LOONGARCH_FCSR0, 0);
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ WARN_ON(!this_cpu_read(in_kernel_fpu));
+
+ if (!is_fpu_owner())
+ disable_fpu();
+ else
+ _restore_fp(&current->thread.fpu);
+
+ this_cpu_write(in_kernel_fpu, false);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S
index bbabf06244c2..c7d961fc72c2 100644
--- a/arch/loongarch/kernel/mcount_dyn.S
+++ b/arch/loongarch/kernel/mcount_dyn.S
@@ -42,7 +42,6 @@
.if \allregs
PTR_S tp, sp, PT_R2
PTR_S t0, sp, PT_R12
- PTR_S t1, sp, PT_R13
PTR_S t2, sp, PT_R14
PTR_S t3, sp, PT_R15
PTR_S t4, sp, PT_R16
@@ -64,6 +63,8 @@
PTR_S zero, sp, PT_R0
.endif
PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */
+ move t1, zero
+ PTR_S t1, sp, PT_R13
PTR_ADDI t8, sp, PT_SIZE
PTR_S t8, sp, PT_R3
.endm
@@ -104,8 +105,12 @@ ftrace_common_return:
PTR_L a7, sp, PT_R11
PTR_L fp, sp, PT_R22
PTR_L t0, sp, PT_ERA
+ PTR_L t1, sp, PT_R13
PTR_ADDI sp, sp, PT_SIZE
+ bnez t1, .Ldirect
jr t0
+.Ldirect:
+ jr t1
SYM_CODE_END(ftrace_common)
SYM_CODE_START(ftrace_caller)
@@ -147,3 +152,9 @@ SYM_CODE_START(return_to_handler)
jr ra
SYM_CODE_END(return_to_handler)
#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_CODE_START(ftrace_stub_direct_tramp)
+ jr t0
+SYM_CODE_END(ftrace_stub_direct_tramp)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index 707bd32e5c4f..ff28f99b47d7 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -461,7 +461,7 @@ static int get_pmc_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
- return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START);
+ return irq_create_mapping(d, INT_PCOV);
return -EINVAL;
}
diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c
index 5c67cc4fd56d..0d82907b5404 100644
--- a/arch/loongarch/kernel/proc.c
+++ b/arch/loongarch/kernel/proc.c
@@ -76,6 +76,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (cpu_has_fpu) seq_printf(m, " fpu");
if (cpu_has_lsx) seq_printf(m, " lsx");
if (cpu_has_lasx) seq_printf(m, " lasx");
+ if (cpu_has_crc32) seq_printf(m, " crc32");
if (cpu_has_complex) seq_printf(m, " complex");
if (cpu_has_crypto) seq_printf(m, " crypto");
if (cpu_has_lvz) seq_printf(m, " lvz");
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index fa2443c7afb2..b71e17c1cc0c 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -62,7 +62,7 @@ unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
{
play_dead();
}
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index 06bceae7d104..5fcffb452367 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -391,10 +391,10 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
return 0;
}
-static int ptrace_hbp_get_resource_info(unsigned int note_type, u16 *info)
+static int ptrace_hbp_get_resource_info(unsigned int note_type, u64 *info)
{
u8 num;
- u16 reg = 0;
+ u64 reg = 0;
switch (note_type) {
case NT_LOONGARCH_HW_BREAK:
@@ -524,15 +524,16 @@ static int ptrace_hbp_set_addr(unsigned int note_type,
return modify_user_hw_breakpoint(bp, &attr);
}
-#define PTRACE_HBP_CTRL_SZ sizeof(u32)
#define PTRACE_HBP_ADDR_SZ sizeof(u64)
#define PTRACE_HBP_MASK_SZ sizeof(u64)
+#define PTRACE_HBP_CTRL_SZ sizeof(u32)
+#define PTRACE_HBP_PAD_SZ sizeof(u32)
static int hw_break_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
- u16 info;
+ u64 info;
u32 ctrl;
u64 addr, mask;
int ret, idx = 0;
@@ -545,7 +546,7 @@ static int hw_break_get(struct task_struct *target,
membuf_write(&to, &info, sizeof(info));
- /* (address, ctrl) registers */
+ /* (address, mask, ctrl) registers */
while (to.left) {
ret = ptrace_hbp_get_addr(note_type, target, idx, &addr);
if (ret)
@@ -562,6 +563,7 @@ static int hw_break_get(struct task_struct *target,
membuf_store(&to, addr);
membuf_store(&to, mask);
membuf_store(&to, ctrl);
+ membuf_zero(&to, sizeof(u32));
idx++;
}
@@ -582,7 +584,7 @@ static int hw_break_set(struct task_struct *target,
offset = offsetof(struct user_watch_state, dbg_regs);
user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset);
- /* (address, ctrl) registers */
+ /* (address, mask, ctrl) registers */
limit = regset->n * regset->size;
while (count && offset < limit) {
if (count < PTRACE_HBP_ADDR_SZ)
@@ -602,7 +604,7 @@ static int hw_break_set(struct task_struct *target,
break;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask,
- offset, offset + PTRACE_HBP_ADDR_SZ);
+ offset, offset + PTRACE_HBP_MASK_SZ);
if (ret)
return ret;
@@ -611,8 +613,8 @@ static int hw_break_set(struct task_struct *target,
return ret;
offset += PTRACE_HBP_MASK_SZ;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask,
- offset, offset + PTRACE_HBP_MASK_SZ);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl,
+ offset, offset + PTRACE_HBP_CTRL_SZ);
if (ret)
return ret;
@@ -620,6 +622,11 @@ static int hw_break_set(struct task_struct *target,
if (ret)
return ret;
offset += PTRACE_HBP_CTRL_SZ;
+
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ offset, offset + PTRACE_HBP_PAD_SZ);
+ offset += PTRACE_HBP_PAD_SZ;
+
idx++;
}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index bae84ccf6d36..4444b13418f0 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -160,6 +160,27 @@ static void __init smbios_parse(void)
dmi_walk(find_tokens, NULL);
}
+#ifdef CONFIG_ARCH_WRITECOMBINE
+pgprot_t pgprot_wc = PAGE_KERNEL_WUC;
+#else
+pgprot_t pgprot_wc = PAGE_KERNEL_SUC;
+#endif
+
+EXPORT_SYMBOL(pgprot_wc);
+
+static int __init setup_writecombine(char *p)
+{
+ if (!strcmp(p, "on"))
+ pgprot_wc = PAGE_KERNEL_WUC;
+ else if (!strcmp(p, "off"))
+ pgprot_wc = PAGE_KERNEL_SUC;
+ else
+ pr_warn("Unknown writecombine setting \"%s\".\n", p);
+
+ return 0;
+}
+early_param("writecombine", setup_writecombine);
+
static int usermem __initdata;
static int __init early_parse_mem(char *p)
@@ -368,8 +389,8 @@ static void __init arch_mem_init(char **cmdline_p)
/*
* In order to reduce the possibility of kernel panic when failed to
* get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
- * low memory as small as possible before plat_swiotlb_setup(), so
- * make sparse_init() using top-down allocation.
+ * low memory as small as possible before swiotlb_init(), so make
+ * sparse_init() using top-down allocation.
*/
memblock_set_bottom_up(false);
sparse_init();
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 8c6e227cb29d..ed167e244cda 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -155,11 +155,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
* it goes straight through and wastes no time serializing
* anything. Worst case is that we lose a reschedule ...
*/
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
}
-EXPORT_SYMBOL_GPL(smp_send_reschedule);
+EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
{
@@ -336,7 +336,7 @@ void play_dead(void)
iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
init_fn();
- unreachable();
+ BUG();
}
#endif
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index 3a690f96f00c..2463d2fea21f 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -30,7 +30,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
regs->regs[1] = 0;
for (unwind_start(&state, task, regs);
- !unwind_done(&state); unwind_next_frame(&state)) {
+ !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr || !consume_entry(cookie, addr))
break;
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index 4351f69d9950..f377e50f3c66 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -133,7 +133,7 @@ static int get_timer_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
- return irq_create_mapping(d, EXCCODE_TIMER - EXCCODE_INT_START);
+ return irq_create_mapping(d, INT_TI);
return -EINVAL;
}
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index de8ebe20b666..8db26e4ca447 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -3,6 +3,7 @@
* Author: Huacai Chen <chenhuacai@loongson.cn>
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/compiler.h>
@@ -35,6 +36,7 @@
#include <asm/break.h>
#include <asm/cpu.h>
#include <asm/fpu.h>
+#include <asm/inst.h>
#include <asm/loongarch.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
@@ -50,6 +52,7 @@
extern asmlinkage void handle_ade(void);
extern asmlinkage void handle_ale(void);
+extern asmlinkage void handle_bce(void);
extern asmlinkage void handle_sys(void);
extern asmlinkage void handle_bp(void);
extern asmlinkage void handle_ri(void);
@@ -153,53 +156,210 @@ static void show_code(unsigned int *pc, bool user)
pr_cont("\n");
}
-static void __show_regs(const struct pt_regs *regs)
+static void print_bool_fragment(const char *key, unsigned long val, bool first)
{
- const int field = 2 * sizeof(unsigned long);
- unsigned int excsubcode;
- unsigned int exccode;
- int i;
+ /* e.g. "+PG", "-DA" */
+ pr_cont("%s%c%s", first ? "" : " ", val ? '+' : '-', key);
+}
- show_regs_print_info(KERN_DEFAULT);
+static void print_plv_fragment(const char *key, int val)
+{
+ /* e.g. "PLV0", "PPLV3" */
+ pr_cont("%s%d", key, val);
+}
- /*
- * Saved main processor registers
- */
- for (i = 0; i < 32; ) {
- if ((i % 4) == 0)
- printk("$%2d :", i);
- pr_cont(" %0*lx", field, regs->regs[i]);
+static void print_memory_type_fragment(const char *key, unsigned long val)
+{
+ const char *humanized_type;
- i++;
- if ((i % 4) == 0)
- pr_cont("\n");
+ switch (val) {
+ case 0:
+ humanized_type = "SUC";
+ break;
+ case 1:
+ humanized_type = "CC";
+ break;
+ case 2:
+ humanized_type = "WUC";
+ break;
+ default:
+ pr_cont(" %s=Reserved(%lu)", key, val);
+ return;
}
+ /* e.g. " DATM=WUC" */
+ pr_cont(" %s=%s", key, humanized_type);
+}
+
+static void print_intr_fragment(const char *key, unsigned long val)
+{
+ /* e.g. "LIE=0-1,3,5-7" */
+ pr_cont("%s=%*pbl", key, EXCCODE_INT_NUM, &val);
+}
+
+static void print_crmd(unsigned long x)
+{
+ printk(" CRMD: %08lx (", x);
+ print_plv_fragment("PLV", (int) FIELD_GET(CSR_CRMD_PLV, x));
+ print_bool_fragment("IE", FIELD_GET(CSR_CRMD_IE, x), false);
+ print_bool_fragment("DA", FIELD_GET(CSR_CRMD_DA, x), false);
+ print_bool_fragment("PG", FIELD_GET(CSR_CRMD_PG, x), false);
+ print_memory_type_fragment("DACF", FIELD_GET(CSR_CRMD_DACF, x));
+ print_memory_type_fragment("DACM", FIELD_GET(CSR_CRMD_DACM, x));
+ print_bool_fragment("WE", FIELD_GET(CSR_CRMD_WE, x), false);
+ pr_cont(")\n");
+}
+
+static void print_prmd(unsigned long x)
+{
+ printk(" PRMD: %08lx (", x);
+ print_plv_fragment("PPLV", (int) FIELD_GET(CSR_PRMD_PPLV, x));
+ print_bool_fragment("PIE", FIELD_GET(CSR_PRMD_PIE, x), false);
+ print_bool_fragment("PWE", FIELD_GET(CSR_PRMD_PWE, x), false);
+ pr_cont(")\n");
+}
+
+static void print_euen(unsigned long x)
+{
+ printk(" EUEN: %08lx (", x);
+ print_bool_fragment("FPE", FIELD_GET(CSR_EUEN_FPEN, x), true);
+ print_bool_fragment("SXE", FIELD_GET(CSR_EUEN_LSXEN, x), false);
+ print_bool_fragment("ASXE", FIELD_GET(CSR_EUEN_LASXEN, x), false);
+ print_bool_fragment("BTE", FIELD_GET(CSR_EUEN_LBTEN, x), false);
+ pr_cont(")\n");
+}
+
+static void print_ecfg(unsigned long x)
+{
+ printk(" ECFG: %08lx (", x);
+ print_intr_fragment("LIE", FIELD_GET(CSR_ECFG_IM, x));
+ pr_cont(" VS=%d)\n", (int) FIELD_GET(CSR_ECFG_VS, x));
+}
+
+static const char *humanize_exc_name(unsigned int ecode, unsigned int esubcode)
+{
+ /*
+ * LoongArch users and developers are probably more familiar with
+ * those names found in the ISA manual, so we are going to print out
+ * the latter. This will require some mapping.
+ */
+ switch (ecode) {
+ case EXCCODE_RSV: return "INT";
+ case EXCCODE_TLBL: return "PIL";
+ case EXCCODE_TLBS: return "PIS";
+ case EXCCODE_TLBI: return "PIF";
+ case EXCCODE_TLBM: return "PME";
+ case EXCCODE_TLBNR: return "PNR";
+ case EXCCODE_TLBNX: return "PNX";
+ case EXCCODE_TLBPE: return "PPI";
+ case EXCCODE_ADE:
+ switch (esubcode) {
+ case EXSUBCODE_ADEF: return "ADEF";
+ case EXSUBCODE_ADEM: return "ADEM";
+ }
+ break;
+ case EXCCODE_ALE: return "ALE";
+ case EXCCODE_BCE: return "BCE";
+ case EXCCODE_SYS: return "SYS";
+ case EXCCODE_BP: return "BRK";
+ case EXCCODE_INE: return "INE";
+ case EXCCODE_IPE: return "IPE";
+ case EXCCODE_FPDIS: return "FPD";
+ case EXCCODE_LSXDIS: return "SXD";
+ case EXCCODE_LASXDIS: return "ASXD";
+ case EXCCODE_FPE:
+ switch (esubcode) {
+ case EXCSUBCODE_FPE: return "FPE";
+ case EXCSUBCODE_VFPE: return "VFPE";
+ }
+ break;
+ case EXCCODE_WATCH:
+ switch (esubcode) {
+ case EXCSUBCODE_WPEF: return "WPEF";
+ case EXCSUBCODE_WPEM: return "WPEM";
+ }
+ break;
+ case EXCCODE_BTDIS: return "BTD";
+ case EXCCODE_BTE: return "BTE";
+ case EXCCODE_GSPR: return "GSPR";
+ case EXCCODE_HVC: return "HVC";
+ case EXCCODE_GCM:
+ switch (esubcode) {
+ case EXCSUBCODE_GCSC: return "GCSC";
+ case EXCSUBCODE_GCHC: return "GCHC";
+ }
+ break;
/*
- * Saved csr registers
+ * The manual did not mention the EXCCODE_SE case, but print out it
+ * nevertheless.
*/
- printk("era : %0*lx %pS\n", field, regs->csr_era,
- (void *) regs->csr_era);
- printk("ra : %0*lx %pS\n", field, regs->regs[1],
- (void *) regs->regs[1]);
+ case EXCCODE_SE: return "SE";
+ }
- printk("CSR crmd: %08lx ", regs->csr_crmd);
- printk("CSR prmd: %08lx ", regs->csr_prmd);
- printk("CSR euen: %08lx ", regs->csr_euen);
- printk("CSR ecfg: %08lx ", regs->csr_ecfg);
- printk("CSR estat: %08lx ", regs->csr_estat);
+ return "???";
+}
- pr_cont("\n");
+static void print_estat(unsigned long x)
+{
+ unsigned int ecode = FIELD_GET(CSR_ESTAT_EXC, x);
+ unsigned int esubcode = FIELD_GET(CSR_ESTAT_ESUBCODE, x);
+
+ printk("ESTAT: %08lx [%s] (", x, humanize_exc_name(ecode, esubcode));
+ print_intr_fragment("IS", FIELD_GET(CSR_ESTAT_IS, x));
+ pr_cont(" ECode=%d EsubCode=%d)\n", (int) ecode, (int) esubcode);
+}
+
+static void __show_regs(const struct pt_regs *regs)
+{
+ const int field = 2 * sizeof(unsigned long);
+ unsigned int exccode = FIELD_GET(CSR_ESTAT_EXC, regs->csr_estat);
+
+ show_regs_print_info(KERN_DEFAULT);
+
+ /* Print saved GPRs except $zero (substituting with PC/ERA) */
+#define GPR_FIELD(x) field, regs->regs[x]
+ printk("pc %0*lx ra %0*lx tp %0*lx sp %0*lx\n",
+ field, regs->csr_era, GPR_FIELD(1), GPR_FIELD(2), GPR_FIELD(3));
+ printk("a0 %0*lx a1 %0*lx a2 %0*lx a3 %0*lx\n",
+ GPR_FIELD(4), GPR_FIELD(5), GPR_FIELD(6), GPR_FIELD(7));
+ printk("a4 %0*lx a5 %0*lx a6 %0*lx a7 %0*lx\n",
+ GPR_FIELD(8), GPR_FIELD(9), GPR_FIELD(10), GPR_FIELD(11));
+ printk("t0 %0*lx t1 %0*lx t2 %0*lx t3 %0*lx\n",
+ GPR_FIELD(12), GPR_FIELD(13), GPR_FIELD(14), GPR_FIELD(15));
+ printk("t4 %0*lx t5 %0*lx t6 %0*lx t7 %0*lx\n",
+ GPR_FIELD(16), GPR_FIELD(17), GPR_FIELD(18), GPR_FIELD(19));
+ printk("t8 %0*lx u0 %0*lx s9 %0*lx s0 %0*lx\n",
+ GPR_FIELD(20), GPR_FIELD(21), GPR_FIELD(22), GPR_FIELD(23));
+ printk("s1 %0*lx s2 %0*lx s3 %0*lx s4 %0*lx\n",
+ GPR_FIELD(24), GPR_FIELD(25), GPR_FIELD(26), GPR_FIELD(27));
+ printk("s5 %0*lx s6 %0*lx s7 %0*lx s8 %0*lx\n",
+ GPR_FIELD(28), GPR_FIELD(29), GPR_FIELD(30), GPR_FIELD(31));
+
+ /* The slot for $zero is reused as the syscall restart flag */
+ if (regs->regs[0])
+ printk("syscall restart flag: %0*lx\n", GPR_FIELD(0));
+
+ if (user_mode(regs)) {
+ printk(" ra: %0*lx\n", GPR_FIELD(1));
+ printk(" ERA: %0*lx\n", field, regs->csr_era);
+ } else {
+ printk(" ra: %0*lx %pS\n", GPR_FIELD(1), (void *) regs->regs[1]);
+ printk(" ERA: %0*lx %pS\n", field, regs->csr_era, (void *) regs->csr_era);
+ }
+#undef GPR_FIELD
- exccode = ((regs->csr_estat) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
- excsubcode = ((regs->csr_estat) & CSR_ESTAT_ESUBCODE) >> CSR_ESTAT_ESUBCODE_SHIFT;
- printk("ExcCode : %x (SubCode %x)\n", exccode, excsubcode);
+ /* Print saved important CSRs */
+ print_crmd(regs->csr_crmd);
+ print_prmd(regs->csr_prmd);
+ print_euen(regs->csr_euen);
+ print_ecfg(regs->csr_ecfg);
+ print_estat(regs->csr_estat);
if (exccode >= EXCCODE_TLBL && exccode <= EXCCODE_ALE)
- printk("BadVA : %0*lx\n", field, regs->csr_badvaddr);
+ printk(" BADV: %0*lx\n", field, regs->csr_badvaddr);
- printk("PrId : %08x (%s)\n", read_cpucfg(LOONGARCH_CPUCFG0),
- cpu_family_string());
+ printk(" PRID: %08x (%s, %s)\n", read_cpucfg(LOONGARCH_CPUCFG0),
+ cpu_family_string(), cpu_full_name_string());
}
void show_regs(struct pt_regs *regs)
@@ -430,6 +590,95 @@ static void bug_handler(struct pt_regs *regs)
}
}
+asmlinkage void noinstr do_bce(struct pt_regs *regs)
+{
+ bool user = user_mode(regs);
+ unsigned long era = exception_era(regs);
+ u64 badv = 0, lower = 0, upper = ULONG_MAX;
+ union loongarch_instruction insn;
+ irqentry_state_t state = irqentry_enter(regs);
+
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_enable();
+
+ current->thread.trap_nr = read_csr_excode();
+
+ die_if_kernel("Bounds check error in kernel code", regs);
+
+ /*
+ * Pull out the address that failed bounds checking, and the lower /
+ * upper bound, by minimally looking at the faulting instruction word
+ * and reading from the correct register.
+ */
+ if (__get_inst(&insn.word, (u32 *)era, user))
+ goto bad_era;
+
+ switch (insn.reg3_format.opcode) {
+ case asrtle_op:
+ if (insn.reg3_format.rd != 0)
+ break; /* not asrtle */
+ badv = regs->regs[insn.reg3_format.rj];
+ upper = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case asrtgt_op:
+ if (insn.reg3_format.rd != 0)
+ break; /* not asrtgt */
+ badv = regs->regs[insn.reg3_format.rj];
+ lower = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case ldleb_op:
+ case ldleh_op:
+ case ldlew_op:
+ case ldled_op:
+ case stleb_op:
+ case stleh_op:
+ case stlew_op:
+ case stled_op:
+ case fldles_op:
+ case fldled_op:
+ case fstles_op:
+ case fstled_op:
+ badv = regs->regs[insn.reg3_format.rj];
+ upper = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case ldgtb_op:
+ case ldgth_op:
+ case ldgtw_op:
+ case ldgtd_op:
+ case stgtb_op:
+ case stgth_op:
+ case stgtw_op:
+ case stgtd_op:
+ case fldgts_op:
+ case fldgtd_op:
+ case fstgts_op:
+ case fstgtd_op:
+ badv = regs->regs[insn.reg3_format.rj];
+ lower = regs->regs[insn.reg3_format.rk];
+ break;
+ }
+
+ force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper);
+
+out:
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_disable();
+
+ irqentry_exit(regs, state);
+ return;
+
+bad_era:
+ /*
+ * Cannot pull out the instruction word, hence cannot provide more
+ * info than a regular SIGSEGV in this case.
+ */
+ force_sig(SIGSEGV);
+ goto out;
+}
+
asmlinkage void noinstr do_bp(struct pt_regs *regs)
{
bool user = user_mode(regs);
@@ -792,11 +1041,12 @@ void __init trap_init(void)
long i;
/* Set interrupt vector handler */
- for (i = EXCCODE_INT_START; i < EXCCODE_INT_END; i++)
+ for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++)
set_handler(i * VECSIZE, handle_vint, VECSIZE);
set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE);
set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE);
+ set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE);
set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE);
set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE);
set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE);
diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c
index a463d6961344..ba324ba76fa1 100644
--- a/arch/loongarch/kernel/unwind.c
+++ b/arch/loongarch/kernel/unwind.c
@@ -28,5 +28,6 @@ bool default_next_frame(struct unwind_state *state)
} while (!get_stack_info(state->sp, state->task, info));
+ state->error = true;
return false;
}
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 9095fde8e55d..55afc27320e1 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -211,7 +211,7 @@ static bool next_frame(struct unwind_state *state)
pc = regs->csr_era;
if (user_mode(regs) || !__kernel_text_address(pc))
- return false;
+ goto out;
state->first = true;
state->pc = pc;
@@ -226,6 +226,8 @@ static bool next_frame(struct unwind_state *state)
} while (!get_stack_info(state->sp, state->task, info));
+out:
+ state->error = true;
return false;
}
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index 40bde632900f..d60d4e096cfa 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -4,4 +4,6 @@
#
lib-y += delay.o memset.o memcpy.o memmove.o \
- clear_user.o copy_user.o dump_tlb.o unaligned.o
+ clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 2dc48e61a2c8..fd1d62b244f2 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
- addi.d a0, a1, (\to) * (-8)
+ sub.d a0, a2, a0
+ addi.d a0, a0, (\to) * (-8)
+ jr ra
+.endr
+
+.irp to, 0, 2, 4
+.L_fixup_handle_s\to\():
+ addi.d a0, a1, -\to
jr ra
.endr
@@ -44,7 +51,7 @@ SYM_FUNC_START(__clear_user_generic)
2: move a0, a1
jr ra
- _asm_extable 1b, .L_fixup_handle_0
+ _asm_extable 1b, .L_fixup_handle_s0
SYM_FUNC_END(__clear_user_generic)
/*
@@ -54,12 +61,21 @@ SYM_FUNC_END(__clear_user_generic)
* a1: size
*/
SYM_FUNC_START(__clear_user_fast)
- beqz a1, 10f
+ sltui t0, a1, 9
+ bnez t0, .Lsmall
- ori a2, zero, 64
- blt a1, a2, 9f
+ add.d a2, a0, a1
+0: st.d zero, a0, 0
+
+ /* align up address */
+ addi.d a0, a0, 8
+ bstrins.d a0, zero, 2, 0
+
+ addi.d a3, a2, -64
+ bgeu a0, a3, .Llt64
/* set 64 bytes at a time */
+.Lloop64:
1: st.d zero, a0, 0
2: st.d zero, a0, 8
3: st.d zero, a0, 16
@@ -68,24 +84,95 @@ SYM_FUNC_START(__clear_user_fast)
6: st.d zero, a0, 40
7: st.d zero, a0, 48
8: st.d zero, a0, 56
-
addi.d a0, a0, 64
- addi.d a1, a1, -64
- bge a1, a2, 1b
-
- beqz a1, 10f
+ bltu a0, a3, .Lloop64
/* set the remaining bytes */
-9: st.b zero, a0, 0
- addi.d a0, a0, 1
- addi.d a1, a1, -1
- bgt a1, zero, 9b
+.Llt64:
+ addi.d a3, a2, -32
+ bgeu a0, a3, .Llt32
+9: st.d zero, a0, 0
+10: st.d zero, a0, 8
+11: st.d zero, a0, 16
+12: st.d zero, a0, 24
+ addi.d a0, a0, 32
+
+.Llt32:
+ addi.d a3, a2, -16
+ bgeu a0, a3, .Llt16
+13: st.d zero, a0, 0
+14: st.d zero, a0, 8
+ addi.d a0, a0, 16
+
+.Llt16:
+ addi.d a3, a2, -8
+ bgeu a0, a3, .Llt8
+15: st.d zero, a0, 0
+
+.Llt8:
+16: st.d zero, a2, -8
/* return */
-10: move a0, a1
+ move a0, zero
+ jr ra
+
+ .align 4
+.Lsmall:
+ pcaddi t0, 4
+ slli.d a2, a1, 4
+ add.d t0, t0, a2
+ jr t0
+
+ .align 4
+ move a0, zero
+ jr ra
+
+ .align 4
+17: st.b zero, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 4
+18: st.h zero, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 4
+19: st.h zero, a0, 0
+20: st.b zero, a0, 2
+ move a0, zero
+ jr ra
+
+ .align 4
+21: st.w zero, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 4
+22: st.w zero, a0, 0
+23: st.b zero, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 4
+24: st.w zero, a0, 0
+25: st.h zero, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 4
+26: st.w zero, a0, 0
+27: st.w zero, a0, 3
+ move a0, zero
+ jr ra
+
+ .align 4
+28: st.d zero, a0, 0
+ move a0, zero
jr ra
/* fixup and ex_table */
+ _asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_1
_asm_extable 3b, .L_fixup_handle_2
@@ -95,4 +182,23 @@ SYM_FUNC_START(__clear_user_fast)
_asm_extable 7b, .L_fixup_handle_6
_asm_extable 8b, .L_fixup_handle_7
_asm_extable 9b, .L_fixup_handle_0
+ _asm_extable 10b, .L_fixup_handle_1
+ _asm_extable 11b, .L_fixup_handle_2
+ _asm_extable 12b, .L_fixup_handle_3
+ _asm_extable 13b, .L_fixup_handle_0
+ _asm_extable 14b, .L_fixup_handle_1
+ _asm_extable 15b, .L_fixup_handle_0
+ _asm_extable 16b, .L_fixup_handle_1
+ _asm_extable 17b, .L_fixup_handle_s0
+ _asm_extable 18b, .L_fixup_handle_s0
+ _asm_extable 19b, .L_fixup_handle_s0
+ _asm_extable 20b, .L_fixup_handle_s2
+ _asm_extable 21b, .L_fixup_handle_s0
+ _asm_extable 22b, .L_fixup_handle_s0
+ _asm_extable 23b, .L_fixup_handle_s4
+ _asm_extable 24b, .L_fixup_handle_s0
+ _asm_extable 25b, .L_fixup_handle_s4
+ _asm_extable 26b, .L_fixup_handle_s0
+ _asm_extable 27b, .L_fixup_handle_s4
+ _asm_extable 28b, .L_fixup_handle_s0
SYM_FUNC_END(__clear_user_fast)
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index 55ac6020a1ad..b21f6d5d38f5 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
- addi.d a0, a2, (\to) * (-8)
+ sub.d a0, a2, a0
+ addi.d a0, a0, (\to) * (-8)
+ jr ra
+.endr
+
+.irp to, 0, 2, 4
+.L_fixup_handle_s\to\():
+ addi.d a0, a2, -\to
jr ra
.endr
@@ -47,8 +54,8 @@ SYM_FUNC_START(__copy_user_generic)
3: move a0, a2
jr ra
- _asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_0
+ _asm_extable 1b, .L_fixup_handle_s0
+ _asm_extable 2b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_generic)
/*
@@ -59,65 +66,209 @@ SYM_FUNC_END(__copy_user_generic)
* a2: n
*/
SYM_FUNC_START(__copy_user_fast)
- beqz a2, 19f
+ sltui t0, a2, 9
+ bnez t0, .Lsmall
- ori a3, zero, 64
- blt a2, a3, 17f
+ add.d a3, a1, a2
+ add.d a2, a0, a2
+0: ld.d t0, a1, 0
+1: st.d t0, a0, 0
- /* copy 64 bytes at a time */
-1: ld.d t0, a1, 0
-2: ld.d t1, a1, 8
-3: ld.d t2, a1, 16
-4: ld.d t3, a1, 24
-5: ld.d t4, a1, 32
-6: ld.d t5, a1, 40
-7: ld.d t6, a1, 48
-8: ld.d t7, a1, 56
-9: st.d t0, a0, 0
-10: st.d t1, a0, 8
-11: st.d t2, a0, 16
-12: st.d t3, a0, 24
-13: st.d t4, a0, 32
-14: st.d t5, a0, 40
-15: st.d t6, a0, 48
-16: st.d t7, a0, 56
+ /* align up destination address */
+ andi t1, a0, 7
+ sub.d t0, zero, t1
+ addi.d t0, t0, 8
+ add.d a1, a1, t0
+ add.d a0, a0, t0
- addi.d a0, a0, 64
- addi.d a1, a1, 64
- addi.d a2, a2, -64
- bge a2, a3, 1b
+ addi.d a4, a3, -64
+ bgeu a1, a4, .Llt64
- beqz a2, 19f
+ /* copy 64 bytes at a time */
+.Lloop64:
+2: ld.d t0, a1, 0
+3: ld.d t1, a1, 8
+4: ld.d t2, a1, 16
+5: ld.d t3, a1, 24
+6: ld.d t4, a1, 32
+7: ld.d t5, a1, 40
+8: ld.d t6, a1, 48
+9: ld.d t7, a1, 56
+ addi.d a1, a1, 64
+10: st.d t0, a0, 0
+11: st.d t1, a0, 8
+12: st.d t2, a0, 16
+13: st.d t3, a0, 24
+14: st.d t4, a0, 32
+15: st.d t5, a0, 40
+16: st.d t6, a0, 48
+17: st.d t7, a0, 56
+ addi.d a0, a0, 64
+ bltu a1, a4, .Lloop64
/* copy the remaining bytes */
-17: ld.b t0, a1, 0
-18: st.b t0, a0, 0
- addi.d a0, a0, 1
- addi.d a1, a1, 1
- addi.d a2, a2, -1
- bgt a2, zero, 17b
+.Llt64:
+ addi.d a4, a3, -32
+ bgeu a1, a4, .Llt32
+18: ld.d t0, a1, 0
+19: ld.d t1, a1, 8
+20: ld.d t2, a1, 16
+21: ld.d t3, a1, 24
+ addi.d a1, a1, 32
+22: st.d t0, a0, 0
+23: st.d t1, a0, 8
+24: st.d t2, a0, 16
+25: st.d t3, a0, 24
+ addi.d a0, a0, 32
+
+.Llt32:
+ addi.d a4, a3, -16
+ bgeu a1, a4, .Llt16
+26: ld.d t0, a1, 0
+27: ld.d t1, a1, 8
+ addi.d a1, a1, 16
+28: st.d t0, a0, 0
+29: st.d t1, a0, 8
+ addi.d a0, a0, 16
+
+.Llt16:
+ addi.d a4, a3, -8
+ bgeu a1, a4, .Llt8
+30: ld.d t0, a1, 0
+31: st.d t0, a0, 0
+
+.Llt8:
+32: ld.d t0, a3, -8
+33: st.d t0, a2, -8
/* return */
-19: move a0, a2
+ move a0, zero
+ jr ra
+
+ .align 5
+.Lsmall:
+ pcaddi t0, 8
+ slli.d a3, a2, 5
+ add.d t0, t0, a3
+ jr t0
+
+ .align 5
+ move a0, zero
+ jr ra
+
+ .align 5
+34: ld.b t0, a1, 0
+35: st.b t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+36: ld.h t0, a1, 0
+37: st.h t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+38: ld.h t0, a1, 0
+39: ld.b t1, a1, 2
+40: st.h t0, a0, 0
+41: st.b t1, a0, 2
+ move a0, zero
+ jr ra
+
+ .align 5
+42: ld.w t0, a1, 0
+43: st.w t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+44: ld.w t0, a1, 0
+45: ld.b t1, a1, 4
+46: st.w t0, a0, 0
+47: st.b t1, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 5
+48: ld.w t0, a1, 0
+49: ld.h t1, a1, 4
+50: st.w t0, a0, 0
+51: st.h t1, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 5
+52: ld.w t0, a1, 0
+53: ld.w t1, a1, 3
+54: st.w t0, a0, 0
+55: st.w t1, a0, 3
+ move a0, zero
+ jr ra
+
+ .align 5
+56: ld.d t0, a1, 0
+57: st.d t0, a0, 0
+ move a0, zero
jr ra
/* fixup and ex_table */
+ _asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_1
- _asm_extable 3b, .L_fixup_handle_2
- _asm_extable 4b, .L_fixup_handle_3
- _asm_extable 5b, .L_fixup_handle_4
- _asm_extable 6b, .L_fixup_handle_5
- _asm_extable 7b, .L_fixup_handle_6
- _asm_extable 8b, .L_fixup_handle_7
+ _asm_extable 2b, .L_fixup_handle_0
+ _asm_extable 3b, .L_fixup_handle_0
+ _asm_extable 4b, .L_fixup_handle_0
+ _asm_extable 5b, .L_fixup_handle_0
+ _asm_extable 6b, .L_fixup_handle_0
+ _asm_extable 7b, .L_fixup_handle_0
+ _asm_extable 8b, .L_fixup_handle_0
_asm_extable 9b, .L_fixup_handle_0
- _asm_extable 10b, .L_fixup_handle_1
- _asm_extable 11b, .L_fixup_handle_2
- _asm_extable 12b, .L_fixup_handle_3
- _asm_extable 13b, .L_fixup_handle_4
- _asm_extable 14b, .L_fixup_handle_5
- _asm_extable 15b, .L_fixup_handle_6
- _asm_extable 16b, .L_fixup_handle_7
- _asm_extable 17b, .L_fixup_handle_0
+ _asm_extable 10b, .L_fixup_handle_0
+ _asm_extable 11b, .L_fixup_handle_1
+ _asm_extable 12b, .L_fixup_handle_2
+ _asm_extable 13b, .L_fixup_handle_3
+ _asm_extable 14b, .L_fixup_handle_4
+ _asm_extable 15b, .L_fixup_handle_5
+ _asm_extable 16b, .L_fixup_handle_6
+ _asm_extable 17b, .L_fixup_handle_7
_asm_extable 18b, .L_fixup_handle_0
+ _asm_extable 19b, .L_fixup_handle_0
+ _asm_extable 20b, .L_fixup_handle_0
+ _asm_extable 21b, .L_fixup_handle_0
+ _asm_extable 22b, .L_fixup_handle_0
+ _asm_extable 23b, .L_fixup_handle_1
+ _asm_extable 24b, .L_fixup_handle_2
+ _asm_extable 25b, .L_fixup_handle_3
+ _asm_extable 26b, .L_fixup_handle_0
+ _asm_extable 27b, .L_fixup_handle_0
+ _asm_extable 28b, .L_fixup_handle_0
+ _asm_extable 29b, .L_fixup_handle_1
+ _asm_extable 30b, .L_fixup_handle_0
+ _asm_extable 31b, .L_fixup_handle_0
+ _asm_extable 32b, .L_fixup_handle_0
+ _asm_extable 33b, .L_fixup_handle_1
+ _asm_extable 34b, .L_fixup_handle_s0
+ _asm_extable 35b, .L_fixup_handle_s0
+ _asm_extable 36b, .L_fixup_handle_s0
+ _asm_extable 37b, .L_fixup_handle_s0
+ _asm_extable 38b, .L_fixup_handle_s0
+ _asm_extable 39b, .L_fixup_handle_s0
+ _asm_extable 40b, .L_fixup_handle_s0
+ _asm_extable 41b, .L_fixup_handle_s2
+ _asm_extable 42b, .L_fixup_handle_s0
+ _asm_extable 43b, .L_fixup_handle_s0
+ _asm_extable 44b, .L_fixup_handle_s0
+ _asm_extable 45b, .L_fixup_handle_s0
+ _asm_extable 46b, .L_fixup_handle_s0
+ _asm_extable 47b, .L_fixup_handle_s4
+ _asm_extable 48b, .L_fixup_handle_s0
+ _asm_extable 49b, .L_fixup_handle_s0
+ _asm_extable 50b, .L_fixup_handle_s0
+ _asm_extable 51b, .L_fixup_handle_s4
+ _asm_extable 52b, .L_fixup_handle_s0
+ _asm_extable 53b, .L_fixup_handle_s0
+ _asm_extable 54b, .L_fixup_handle_s0
+ _asm_extable 55b, .L_fixup_handle_s4
+ _asm_extable 56b, .L_fixup_handle_s0
+ _asm_extable 57b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_fast)
diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c
new file mode 100644
index 000000000000..a5e84b403c3b
--- /dev/null
+++ b/arch/loongarch/lib/csum.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2019-2020 Arm Ltd.
+
+#include <linux/compiler.h>
+#include <linux/kasan-checks.h>
+#include <linux/kernel.h>
+
+#include <net/checksum.h>
+
+static u64 accumulate(u64 sum, u64 data)
+{
+ sum += data;
+ if (sum < data)
+ sum += 1;
+ return sum;
+}
+
+/*
+ * We over-read the buffer and this makes KASAN unhappy. Instead, disable
+ * instrumentation and call kasan explicitly.
+ */
+unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
+{
+ unsigned int offset, shift, sum;
+ const u64 *ptr;
+ u64 data, sum64 = 0;
+
+ if (unlikely(len == 0))
+ return 0;
+
+ offset = (unsigned long)buff & 7;
+ /*
+ * This is to all intents and purposes safe, since rounding down cannot
+ * result in a different page or cache line being accessed, and @buff
+ * should absolutely not be pointing to anything read-sensitive. We do,
+ * however, have to be careful not to piss off KASAN, which means using
+ * unchecked reads to accommodate the head and tail, for which we'll
+ * compensate with an explicit check up-front.
+ */
+ kasan_check_read(buff, len);
+ ptr = (u64 *)(buff - offset);
+ len = len + offset - 8;
+
+ /*
+ * Head: zero out any excess leading bytes. Shifting back by the same
+ * amount should be at least as fast as any other way of handling the
+ * odd/even alignment, and means we can ignore it until the very end.
+ */
+ shift = offset * 8;
+ data = *ptr++;
+ data = (data >> shift) << shift;
+
+ /*
+ * Body: straightforward aligned loads from here on (the paired loads
+ * underlying the quadword type still only need dword alignment). The
+ * main loop strictly excludes the tail, so the second loop will always
+ * run at least once.
+ */
+ while (unlikely(len > 64)) {
+ __uint128_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = *(__uint128_t *)ptr;
+ tmp2 = *(__uint128_t *)(ptr + 2);
+ tmp3 = *(__uint128_t *)(ptr + 4);
+ tmp4 = *(__uint128_t *)(ptr + 6);
+
+ len -= 64;
+ ptr += 8;
+
+ /* This is the "don't dump the carry flag into a GPR" idiom */
+ tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+ tmp2 += (tmp2 >> 64) | (tmp2 << 64);
+ tmp3 += (tmp3 >> 64) | (tmp3 << 64);
+ tmp4 += (tmp4 >> 64) | (tmp4 << 64);
+ tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
+ tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+ tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
+ tmp3 += (tmp3 >> 64) | (tmp3 << 64);
+ tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
+ tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+ tmp1 = ((tmp1 >> 64) << 64) | sum64;
+ tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+ sum64 = tmp1 >> 64;
+ }
+ while (len > 8) {
+ __uint128_t tmp;
+
+ sum64 = accumulate(sum64, data);
+ tmp = *(__uint128_t *)ptr;
+
+ len -= 16;
+ ptr += 2;
+
+ data = tmp >> 64;
+ sum64 = accumulate(sum64, tmp);
+ }
+ if (len > 0) {
+ sum64 = accumulate(sum64, data);
+ data = *ptr;
+ len -= 8;
+ }
+ /*
+ * Tail: zero any over-read bytes similarly to the head, again
+ * preserving odd/even alignment.
+ */
+ shift = len * -8;
+ data = (data << shift) >> shift;
+ sum64 = accumulate(sum64, data);
+
+ /* Finally, folding */
+ sum64 += (sum64 >> 32) | (sum64 << 32);
+ sum = sum64 >> 32;
+ sum += (sum >> 16) | (sum << 16);
+ if (offset & 1)
+ return (u16)swab32(sum);
+
+ return sum >> 16;
+}
+
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ __uint128_t src, dst;
+ u64 sum = (__force u64)csum;
+
+ src = *(const __uint128_t *)saddr->s6_addr;
+ dst = *(const __uint128_t *)daddr->s6_addr;
+
+ sum += (__force u32)htonl(len);
+ sum += (u32)proto << 24;
+ src += (src >> 64) | (src << 64);
+ dst += (dst >> 64) | (dst << 64);
+
+ sum = accumulate(sum, src >> 64);
+ sum = accumulate(sum, dst >> 64);
+
+ sum += ((sum >> 32) | (sum << 32));
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/loongarch/lib/error-inject.c b/arch/loongarch/lib/error-inject.c
new file mode 100644
index 000000000000..afc9e1c7c973
--- /dev/null
+++ b/arch/loongarch/lib/error-inject.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ instruction_pointer_set(regs, regs->regs[1]);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
index 3b7e1dec7109..39ce6621c704 100644
--- a/arch/loongarch/lib/memcpy.S
+++ b/arch/loongarch/lib/memcpy.S
@@ -44,6 +44,66 @@ SYM_FUNC_START(__memcpy_generic)
SYM_FUNC_END(__memcpy_generic)
_ASM_NOKPROBE(__memcpy_generic)
+ .align 5
+SYM_FUNC_START_NOALIGN(__memcpy_small)
+ pcaddi t0, 8
+ slli.d a2, a2, 5
+ add.d t0, t0, a2
+ jr t0
+
+ .align 5
+0: jr ra
+
+ .align 5
+1: ld.b t0, a1, 0
+ st.b t0, a0, 0
+ jr ra
+
+ .align 5
+2: ld.h t0, a1, 0
+ st.h t0, a0, 0
+ jr ra
+
+ .align 5
+3: ld.h t0, a1, 0
+ ld.b t1, a1, 2
+ st.h t0, a0, 0
+ st.b t1, a0, 2
+ jr ra
+
+ .align 5
+4: ld.w t0, a1, 0
+ st.w t0, a0, 0
+ jr ra
+
+ .align 5
+5: ld.w t0, a1, 0
+ ld.b t1, a1, 4
+ st.w t0, a0, 0
+ st.b t1, a0, 4
+ jr ra
+
+ .align 5
+6: ld.w t0, a1, 0
+ ld.h t1, a1, 4
+ st.w t0, a0, 0
+ st.h t1, a0, 4
+ jr ra
+
+ .align 5
+7: ld.w t0, a1, 0
+ ld.w t1, a1, 3
+ st.w t0, a0, 0
+ st.w t1, a0, 3
+ jr ra
+
+ .align 5
+8: ld.d t0, a1, 0
+ st.d t0, a0, 0
+ jr ra
+SYM_FUNC_END(__memcpy_small)
+_ASM_NOKPROBE(__memcpy_small)
+
/*
* void *__memcpy_fast(void *dst, const void *src, size_t n)
*
@@ -52,14 +112,27 @@ _ASM_NOKPROBE(__memcpy_generic)
* a2: n
*/
SYM_FUNC_START(__memcpy_fast)
- move a3, a0
- beqz a2, 3f
+ sltui t0, a2, 9
+ bnez t0, __memcpy_small
+
+ add.d a3, a1, a2
+ add.d a2, a0, a2
+ ld.d a6, a1, 0
+ ld.d a7, a3, -8
+
+ /* align up destination address */
+ andi t1, a0, 7
+ sub.d t0, zero, t1
+ addi.d t0, t0, 8
+ add.d a1, a1, t0
+ add.d a5, a0, t0
- ori a4, zero, 64
- blt a2, a4, 2f
+ addi.d a4, a3, -64
+ bgeu a1, a4, .Llt64
/* copy 64 bytes at a time */
-1: ld.d t0, a1, 0
+.Lloop64:
+ ld.d t0, a1, 0
ld.d t1, a1, 8
ld.d t2, a1, 16
ld.d t3, a1, 24
@@ -67,32 +140,54 @@ SYM_FUNC_START(__memcpy_fast)
ld.d t5, a1, 40
ld.d t6, a1, 48
ld.d t7, a1, 56
- st.d t0, a0, 0
- st.d t1, a0, 8
- st.d t2, a0, 16
- st.d t3, a0, 24
- st.d t4, a0, 32
- st.d t5, a0, 40
- st.d t6, a0, 48
- st.d t7, a0, 56
-
- addi.d a0, a0, 64
addi.d a1, a1, 64
- addi.d a2, a2, -64
- bge a2, a4, 1b
-
- beqz a2, 3f
+ st.d t0, a5, 0
+ st.d t1, a5, 8
+ st.d t2, a5, 16
+ st.d t3, a5, 24
+ st.d t4, a5, 32
+ st.d t5, a5, 40
+ st.d t6, a5, 48
+ st.d t7, a5, 56
+ addi.d a5, a5, 64
+ bltu a1, a4, .Lloop64
/* copy the remaining bytes */
-2: ld.b t0, a1, 0
- st.b t0, a0, 0
- addi.d a0, a0, 1
- addi.d a1, a1, 1
- addi.d a2, a2, -1
- bgt a2, zero, 2b
+.Llt64:
+ addi.d a4, a3, -32
+ bgeu a1, a4, .Llt32
+ ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ ld.d t2, a1, 16
+ ld.d t3, a1, 24
+ addi.d a1, a1, 32
+ st.d t0, a5, 0
+ st.d t1, a5, 8
+ st.d t2, a5, 16
+ st.d t3, a5, 24
+ addi.d a5, a5, 32
+
+.Llt32:
+ addi.d a4, a3, -16
+ bgeu a1, a4, .Llt16
+ ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ addi.d a1, a1, 16
+ st.d t0, a5, 0
+ st.d t1, a5, 8
+ addi.d a5, a5, 16
+
+.Llt16:
+ addi.d a4, a3, -8
+ bgeu a1, a4, .Llt8
+ ld.d t0, a1, 0
+ st.d t0, a5, 0
+
+.Llt8:
+ st.d a6, a0, 0
+ st.d a7, a2, -8
/* return */
-3: move a0, a3
jr ra
SYM_FUNC_END(__memcpy_fast)
_ASM_NOKPROBE(__memcpy_fast)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
index b796c3d6da05..45b725ba7867 100644
--- a/arch/loongarch/lib/memmove.S
+++ b/arch/loongarch/lib/memmove.S
@@ -11,23 +11,9 @@
#include <asm/regdef.h>
SYM_FUNC_START(memmove)
- blt a0, a1, 1f /* dst < src, memcpy */
- blt a1, a0, 3f /* src < dst, rmemcpy */
+ blt a0, a1, memcpy /* dst < src, memcpy */
+ blt a1, a0, rmemcpy /* src < dst, rmemcpy */
jr ra /* dst == src, return */
-
- /* if (src - dst) < 64, copy 1 byte at a time */
-1: ori a3, zero, 64
- sub.d t0, a1, a0
- blt t0, a3, 2f
- b memcpy
-2: b __memcpy_generic
-
- /* if (dst - src) < 64, copy 1 byte at a time */
-3: ori a3, zero, 64
- sub.d t0, a0, a1
- blt t0, a3, 4f
- b rmemcpy
-4: b __rmemcpy_generic
SYM_FUNC_END(memmove)
_ASM_NOKPROBE(memmove)
@@ -76,50 +62,80 @@ _ASM_NOKPROBE(__rmemcpy_generic)
* a2: n
*/
SYM_FUNC_START(__rmemcpy_fast)
- move a3, a0
- beqz a2, 3f
+ sltui t0, a2, 9
+ bnez t0, __memcpy_small
- add.d a0, a0, a2
- add.d a1, a1, a2
+ add.d a3, a1, a2
+ add.d a2, a0, a2
+ ld.d a6, a1, 0
+ ld.d a7, a3, -8
+
+ /* align up destination address */
+ andi t1, a2, 7
+ sub.d a3, a3, t1
+ sub.d a5, a2, t1
- ori a4, zero, 64
- blt a2, a4, 2f
+ addi.d a4, a1, 64
+ bgeu a4, a3, .Llt64
/* copy 64 bytes at a time */
-1: ld.d t0, a1, -8
- ld.d t1, a1, -16
- ld.d t2, a1, -24
- ld.d t3, a1, -32
- ld.d t4, a1, -40
- ld.d t5, a1, -48
- ld.d t6, a1, -56
- ld.d t7, a1, -64
- st.d t0, a0, -8
- st.d t1, a0, -16
- st.d t2, a0, -24
- st.d t3, a0, -32
- st.d t4, a0, -40
- st.d t5, a0, -48
- st.d t6, a0, -56
- st.d t7, a0, -64
-
- addi.d a0, a0, -64
- addi.d a1, a1, -64
- addi.d a2, a2, -64
- bge a2, a4, 1b
-
- beqz a2, 3f
+.Lloop64:
+ ld.d t0, a3, -8
+ ld.d t1, a3, -16
+ ld.d t2, a3, -24
+ ld.d t3, a3, -32
+ ld.d t4, a3, -40
+ ld.d t5, a3, -48
+ ld.d t6, a3, -56
+ ld.d t7, a3, -64
+ addi.d a3, a3, -64
+ st.d t0, a5, -8
+ st.d t1, a5, -16
+ st.d t2, a5, -24
+ st.d t3, a5, -32
+ st.d t4, a5, -40
+ st.d t5, a5, -48
+ st.d t6, a5, -56
+ st.d t7, a5, -64
+ addi.d a5, a5, -64
+ bltu a4, a3, .Lloop64
/* copy the remaining bytes */
-2: ld.b t0, a1, -1
- st.b t0, a0, -1
- addi.d a0, a0, -1
- addi.d a1, a1, -1
- addi.d a2, a2, -1
- bgt a2, zero, 2b
+.Llt64:
+ addi.d a4, a1, 32
+ bgeu a4, a3, .Llt32
+ ld.d t0, a3, -8
+ ld.d t1, a3, -16
+ ld.d t2, a3, -24
+ ld.d t3, a3, -32
+ addi.d a3, a3, -32
+ st.d t0, a5, -8
+ st.d t1, a5, -16
+ st.d t2, a5, -24
+ st.d t3, a5, -32
+ addi.d a5, a5, -32
+
+.Llt32:
+ addi.d a4, a1, 16
+ bgeu a4, a3, .Llt16
+ ld.d t0, a3, -8
+ ld.d t1, a3, -16
+ addi.d a3, a3, -16
+ st.d t0, a5, -8
+ st.d t1, a5, -16
+ addi.d a5, a5, -16
+
+.Llt16:
+ addi.d a4, a1, 8
+ bgeu a4, a3, .Llt8
+ ld.d t0, a3, -8
+ st.d t0, a5, -8
+
+.Llt8:
+ st.d a6, a0, 0
+ st.d a7, a2, -8
/* return */
-3: move a0, a3
jr ra
SYM_FUNC_END(__rmemcpy_fast)
_ASM_NOKPROBE(__rmemcpy_fast)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
index a9eb732ab2ad..b39c6194e3ae 100644
--- a/arch/loongarch/lib/memset.S
+++ b/arch/loongarch/lib/memset.S
@@ -56,39 +56,107 @@ _ASM_NOKPROBE(__memset_generic)
* a2: n
*/
SYM_FUNC_START(__memset_fast)
- move a3, a0
- beqz a2, 3f
-
- ori a4, zero, 64
- blt a2, a4, 2f
-
/* fill a1 to 64 bits */
fill_to_64 a1
- /* set 64 bytes at a time */
-1: st.d a1, a0, 0
- st.d a1, a0, 8
- st.d a1, a0, 16
- st.d a1, a0, 24
- st.d a1, a0, 32
- st.d a1, a0, 40
- st.d a1, a0, 48
- st.d a1, a0, 56
+ sltui t0, a2, 9
+ bnez t0, .Lsmall
- addi.d a0, a0, 64
- addi.d a2, a2, -64
- bge a2, a4, 1b
+ add.d a2, a0, a2
+ st.d a1, a0, 0
- beqz a2, 3f
+ /* align up address */
+ addi.d a3, a0, 8
+ bstrins.d a3, zero, 2, 0
+
+ addi.d a4, a2, -64
+ bgeu a3, a4, .Llt64
+
+ /* set 64 bytes at a time */
+.Lloop64:
+ st.d a1, a3, 0
+ st.d a1, a3, 8
+ st.d a1, a3, 16
+ st.d a1, a3, 24
+ st.d a1, a3, 32
+ st.d a1, a3, 40
+ st.d a1, a3, 48
+ st.d a1, a3, 56
+ addi.d a3, a3, 64
+ bltu a3, a4, .Lloop64
/* set the remaining bytes */
-2: st.b a1, a0, 0
- addi.d a0, a0, 1
- addi.d a2, a2, -1
- bgt a2, zero, 2b
+.Llt64:
+ addi.d a4, a2, -32
+ bgeu a3, a4, .Llt32
+ st.d a1, a3, 0
+ st.d a1, a3, 8
+ st.d a1, a3, 16
+ st.d a1, a3, 24
+ addi.d a3, a3, 32
+
+.Llt32:
+ addi.d a4, a2, -16
+ bgeu a3, a4, .Llt16
+ st.d a1, a3, 0
+ st.d a1, a3, 8
+ addi.d a3, a3, 16
+
+.Llt16:
+ addi.d a4, a2, -8
+ bgeu a3, a4, .Llt8
+ st.d a1, a3, 0
+
+.Llt8:
+ st.d a1, a2, -8
/* return */
-3: move a0, a3
+ jr ra
+
+ .align 4
+.Lsmall:
+ pcaddi t0, 4
+ slli.d a2, a2, 4
+ add.d t0, t0, a2
+ jr t0
+
+ .align 4
+0: jr ra
+
+ .align 4
+1: st.b a1, a0, 0
+ jr ra
+
+ .align 4
+2: st.h a1, a0, 0
+ jr ra
+
+ .align 4
+3: st.h a1, a0, 0
+ st.b a1, a0, 2
+ jr ra
+
+ .align 4
+4: st.w a1, a0, 0
+ jr ra
+
+ .align 4
+5: st.w a1, a0, 0
+ st.b a1, a0, 4
+ jr ra
+
+ .align 4
+6: st.w a1, a0, 0
+ st.h a1, a0, 4
+ jr ra
+
+ .align 4
+7: st.w a1, a0, 0
+ st.w a1, a0, 3
+ jr ra
+
+ .align 4
+8: st.d a1, a0, 0
jr ra
SYM_FUNC_END(__memset_fast)
_ASM_NOKPROBE(__memset_fast)
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index e018aed34586..3b7d8129570b 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -41,7 +41,7 @@
* don't have to care about aliases on other CPUs.
*/
unsigned long empty_zero_page, zero_page_mask;
-EXPORT_SYMBOL_GPL(empty_zero_page);
+EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);
void setup_zero_pages(void)
@@ -270,7 +270,7 @@ pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
#endif
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
-EXPORT_SYMBOL_GPL(invalid_pmd_table);
+EXPORT_SYMBOL(invalid_pmd_table);
#endif
pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
EXPORT_SYMBOL(invalid_pte_table);
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 288003a9f0ca..db9342b2d0e6 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1022,6 +1022,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
emit_atomic(insn, ctx);
break;
+ /* Speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
default:
pr_err("bpf_jit: unknown opcode %02x\n", code);
return -EINVAL;
@@ -1248,3 +1252,9 @@ out:
return prog;
}
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return true;
+}
diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S
index 90da899c06a1..e2fc3b4e31f0 100644
--- a/arch/loongarch/power/suspend_asm.S
+++ b/arch/loongarch/power/suspend_asm.S
@@ -80,6 +80,10 @@ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL)
JUMP_VIRT_ADDR t0, t1
+ /* Enable PG */
+ li.w t0, 0xb0 # PLV=0, IE=0, PG=1
+ csrwr t0, LOONGARCH_CSR_CRMD
+
la.pcrel t0, acpi_saved_sp
ld.d sp, t0, 0
SETUP_WAKEUP
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index d89e2ac75f7b..461240ab4436 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Objects to go into the VDSO.
-# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
-# the inclusion of generic Makefile.
-ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT
+# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o