aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorEric Biggers2024-04-12 20:17:27 -0700
committerHerbert Xu2024-04-19 18:54:19 +0800
commite619723a857dfdcf0050713f12b3916816cd8d12 (patch)
tree6636a75023ac0344fde3e7696872b4ec691272cf /arch/x86
parent2717e01fc3fb4d37b625b9bd6cf161d0d9d5c4b5 (diff)
crypto: x86/aes-xts - eliminate a few more instructions
- For conditionally subtracting 16 from LEN when decrypting a message whose length isn't a multiple of 16, use the cmovnz instruction. - Fold the addition of 4*VL to LEN into the sub of VL or 16 from LEN. - Remove an unnecessary test instruction. This results in slightly shorter code, both source and binary. Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/aes-xts-avx-x86_64.S39
1 files changed, 13 insertions, 26 deletions
diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index f5e7ab739105..802d3b90d337 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -559,20 +559,20 @@
.macro _aes_xts_crypt enc
_define_aliases
- // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
- movl 480(KEY), KEYLEN
-
.if !\enc
// When decrypting a message whose length isn't a multiple of the AES
// block length, exclude the last full block from the main loop by
// subtracting 16 from LEN. This is needed because ciphertext stealing
// decryption uses the last two tweaks in reverse order. We'll handle
// the last full block and the partial block specially at the end.
+ lea -16(LEN), %rax
test $15, LEN
- jnz .Lneed_cts_dec\@
-.Lxts_init\@:
+ cmovnz %rax, LEN
.endif
+ // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
+ movl 480(KEY), KEYLEN
+
// Setup the pointer to the round keys and cache as many as possible.
_setup_round_keys \enc
@@ -661,11 +661,10 @@
RET
.Lhandle_remainder\@:
- add $4*VL, LEN // Undo the extra sub from earlier.
// En/decrypt any remaining full blocks, one vector at a time.
.if VL > 16
- sub $VL, LEN
+ add $3*VL, LEN // Undo extra sub of 4*VL, then sub VL.
jl .Lvec_at_a_time_done\@
.Lvec_at_a_time\@:
_vmovdqu (SRC), V0
@@ -677,9 +676,9 @@
sub $VL, LEN
jge .Lvec_at_a_time\@
.Lvec_at_a_time_done\@:
- add $VL-16, LEN // Undo the extra sub from earlier.
+ add $VL-16, LEN // Undo extra sub of VL, then sub 16.
.else
- sub $16, LEN
+ add $4*VL-16, LEN // Undo extra sub of 4*VL, then sub 16.
.endif
// En/decrypt any remaining full blocks, one at a time.
@@ -694,24 +693,12 @@
sub $16, LEN
jge .Lblock_at_a_time\@
.Lblock_at_a_time_done\@:
- add $16, LEN // Undo the extra sub from earlier.
-
-.Lfull_blocks_done\@:
- // Now 0 <= LEN <= 15. If LEN is nonzero, do ciphertext stealing to
- // process the last 16 + LEN bytes. If LEN is zero, we're done.
- test LEN, LEN
- jnz .Lcts\@
- jmp .Ldone\@
-
-.if !\enc
-.Lneed_cts_dec\@:
- sub $16, LEN
- jmp .Lxts_init\@
-.endif
+ add $16, LEN // Undo the extra sub of 16.
+ // Now 0 <= LEN <= 15. If LEN is zero, we're done.
+ jz .Ldone\@
-.Lcts\@:
- // Do ciphertext stealing (CTS) to en/decrypt the last full block and
- // the partial block. TWEAK0_XMM contains the next tweak.
+ // Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN.
+ // Do ciphertext stealing to process the last 16 + LEN bytes.
.if \enc
// If encrypting, the main loop already encrypted the last full block to