aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
authorChristophe Leroy2018-05-24 11:22:27 +0000
committerMichael Ellerman2018-06-04 00:39:19 +1000
commit373e098e1e788d7b89ec0f31765a6c08e2ea0f7c (patch)
tree4c4b689c05300a7fdf7e1494c1f72c17afa9e62f /arch/powerpc
parent1128bb7813a896bd608fb622eee3c26aaf33b473 (diff)
powerpc/32: Optimise __csum_partial()
Improve __csum_partial by interleaving loads and adds. On a 8xx, it brings neither improvement nor degradation. On a 83xx, it brings a 25% improvement. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/lib/checksum_32.S13
1 files changed, 11 insertions, 2 deletions
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 9a671c774b22..422d66e938e3 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -47,16 +47,25 @@ _GLOBAL(__csum_partial)
bdnz 2b
21: srwi. r6,r4,4 /* # blocks of 4 words to do */
beq 3f
+ lwz r0,4(r3)
mtctr r6
-22: lwz r0,4(r3)
lwz r6,8(r3)
+ adde r5,r5,r0
lwz r7,12(r3)
+ adde r5,r5,r6
lwzu r8,16(r3)
+ adde r5,r5,r7
+ bdz 23f
+22: lwz r0,4(r3)
+ adde r5,r5,r8
+ lwz r6,8(r3)
adde r5,r5,r0
+ lwz r7,12(r3)
adde r5,r5,r6
+ lwzu r8,16(r3)
adde r5,r5,r7
- adde r5,r5,r8
bdnz 22b
+23: adde r5,r5,r8
3: andi. r0,r4,2
beq+ 4f
lhz r0,4(r3)