From 373e098e1e788d7b89ec0f31765a6c08e2ea0f7c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 24 May 2018 11:22:27 +0000 Subject: powerpc/32: Optimise __csum_partial() Improve __csum_partial by interleaving loads and adds. On a 8xx, it brings neither improvement nor degradation. On a 83xx, it brings a 25% improvement. Signed-off-by: Christophe Leroy Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman --- arch/powerpc/lib/checksum_32.S | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 9a671c774b22..422d66e938e3 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -47,16 +47,25 @@ _GLOBAL(__csum_partial) bdnz 2b 21: srwi. r6,r4,4 /* # blocks of 4 words to do */ beq 3f + lwz r0,4(r3) mtctr r6 -22: lwz r0,4(r3) lwz r6,8(r3) + adde r5,r5,r0 lwz r7,12(r3) + adde r5,r5,r6 lwzu r8,16(r3) + adde r5,r5,r7 + bdz 23f +22: lwz r0,4(r3) + adde r5,r5,r8 + lwz r6,8(r3) adde r5,r5,r0 + lwz r7,12(r3) adde r5,r5,r6 + lwzu r8,16(r3) adde r5,r5,r7 - adde r5,r5,r8 bdnz 22b +23: adde r5,r5,r8 3: andi. r0,r4,2 beq+ 4f lhz r0,4(r3) -- cgit v1.2.3