diff options
author | Christophe Leroy | 2018-05-24 11:22:27 +0000 |
---|---|---|
committer | Michael Ellerman | 2018-06-04 00:39:19 +1000 |
commit | 373e098e1e788d7b89ec0f31765a6c08e2ea0f7c (patch) | |
tree | 4c4b689c05300a7fdf7e1494c1f72c17afa9e62f /arch/powerpc/lib | |
parent | 1128bb7813a896bd608fb622eee3c26aaf33b473 (diff) |
powerpc/32: Optimise __csum_partial()
Improve __csum_partial by interleaving loads and adds.
On a 8xx, it brings neither improvement nor degradation.
On a 83xx, it brings a 25% improvement.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r-- | arch/powerpc/lib/checksum_32.S | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 9a671c774b22..422d66e938e3 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -47,16 +47,25 @@ _GLOBAL(__csum_partial) bdnz 2b 21: srwi. r6,r4,4 /* # blocks of 4 words to do */ beq 3f + lwz r0,4(r3) mtctr r6 -22: lwz r0,4(r3) lwz r6,8(r3) + adde r5,r5,r0 lwz r7,12(r3) + adde r5,r5,r6 lwzu r8,16(r3) + adde r5,r5,r7 + bdz 23f +22: lwz r0,4(r3) + adde r5,r5,r8 + lwz r6,8(r3) adde r5,r5,r0 + lwz r7,12(r3) adde r5,r5,r6 + lwzu r8,16(r3) adde r5,r5,r7 - adde r5,r5,r8 bdnz 22b +23: adde r5,r5,r8 3: andi. r0,r4,2 beq+ 4f lhz r0,4(r3) |