aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
authorChristophe Leroy2018-05-30 07:06:15 +0000
committerMichael Ellerman2018-06-04 00:39:21 +1000
commit2676b89eb82f56825d7289edf21f17a6461388b7 (patch)
tree88beb79699ed2916ad1d9082613bc11ce792fbd3 /arch/powerpc
parentf36bbf21e8b911b3c629fd36d4d217105b47a20e (diff)
powerpc/lib: optimise PPC32 memcmp
At the time being, memcmp() compares two chunks of memory byte per byte. This patch optimises the comparison by comparing word by word. On the same way as commit 15c2d45d17418 ("powerpc: Add 64bit optimised memcmp"), this patch moves memcmp() into a dedicated file named memcmp_32.S A small benchmark performed on an 8xx comparing two chuncks of 512 bytes performed 100000 times gives: Before : 5852274 TB ticks After: 1488638 TB ticks This is almost 4 times faster Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/lib/Makefile4
-rw-r--r--arch/powerpc/lib/memcmp_32.S45
-rw-r--r--arch/powerpc/lib/string.S17
3 files changed, 47 insertions, 19 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 2c9b8c0adf22..d0ca13ad8231 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -26,14 +26,14 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
- memcpy_64.o memcmp_64.o pmem.o
+ memcpy_64.o pmem.o
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
obj-y += checksum_$(BITS).o checksum_wrappers.o \
- string_$(BITS).o
+ string_$(BITS).o memcmp_$(BITS).o
obj-y += sstep.o ldstfp.o quad.o
obj64-y += quad.o
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
new file mode 100644
index 000000000000..5010e376f7b8
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * memcmp for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+ .text
+
+_GLOBAL(memcmp)
+ srawi. r7, r5, 2 /* Divide len by 4 */
+ mr r6, r3
+ beq- 3f
+ mtctr r7
+ li r7, 0
+1: lwzx r3, r6, r7
+ lwzx r0, r4, r7
+ addi r7, r7, 4
+ cmplw cr0, r3, r0
+ bdnzt eq, 1b
+ bne 5f
+3: andi. r3, r5, 3
+ beqlr
+ cmplwi cr1, r3, 2
+ blt- cr1, 4f
+ lhzx r3, r6, r7
+ lhzx r0, r4, r7
+ addi r7, r7, 2
+ subf. r3, r0, r3
+ beqlr cr1
+ bnelr
+4: lbzx r3, r6, r7
+ lbzx r0, r4, r7
+ subf. r3, r0, r3
+ blr
+5: li r3, 1
+ bgtlr
+ li r3, -1
+ blr
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 5343a88e619e..4b41970e9ed8 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -54,23 +54,6 @@ _GLOBAL(strncmp)
blr
EXPORT_SYMBOL(strncmp)
-#ifdef CONFIG_PPC32
-_GLOBAL(memcmp)
- PPC_LCMPI 0,r5,0
- beq- 2f
- mtctr r5
- addi r6,r3,-1
- addi r4,r4,-1
-1: lbzu r3,1(r6)
- lbzu r0,1(r4)
- subf. r3,r0,r3
- bdnzt 2,1b
- blr
-2: li r3,0
- blr
-EXPORT_SYMBOL(memcmp)
-#endif
-
_GLOBAL(memchr)
PPC_LCMPI 0,r5,0
beq- 2f