crypto: arm64/aes-ce - Simplify round key load sequence
Tweak the round key logic so that they can be loaded using a single branchless sequence using overlapping loads. This is shorter and simpler, and puts the conditional branches based on the key size further apart, which might benefit microarchitectures that cannot record taken branches at every instruction. For these branches, use test-bit-branch instructions that don't clobber the condition flags. Note that none of this has any impact on performance, positive or otherwise (and the branch prediction benefit would only benefit AES-192 which nobody uses). It does make for nicer code, though. While at it, use \@ to generate the labels inside the macros, which is more robust than using fixed numbers, which could clash inadvertently. Also, bring aes-neon.S in line with these changes, including the switch to test-and-branch instructions, to avoid surprises in the future when we might start relying on the condition flags being preserved in the chaining mode wrappers in aes-modes.S Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
3f4d1482da
commit
571e557cba
@ -25,33 +25,28 @@
|
||||
.endm
|
||||
|
||||
/* preload all round keys */
|
||||
.macro load_round_keys, rounds, rk
|
||||
cmp \rounds, #12
|
||||
blo 2222f /* 128 bits */
|
||||
beq 1111f /* 192 bits */
|
||||
ld1 {v17.4s-v18.4s}, [\rk], #32
|
||||
1111: ld1 {v19.4s-v20.4s}, [\rk], #32
|
||||
2222: ld1 {v21.4s-v24.4s}, [\rk], #64
|
||||
ld1 {v25.4s-v28.4s}, [\rk], #64
|
||||
ld1 {v29.4s-v31.4s}, [\rk]
|
||||
.macro load_round_keys, rk, nr, tmp
|
||||
add \tmp, \rk, \nr, sxtw #4
|
||||
sub \tmp, \tmp, #160
|
||||
ld1 {v17.4s-v20.4s}, [\rk]
|
||||
ld1 {v21.4s-v24.4s}, [\tmp], #64
|
||||
ld1 {v25.4s-v28.4s}, [\tmp], #64
|
||||
ld1 {v29.4s-v31.4s}, [\tmp]
|
||||
.endm
|
||||
|
||||
/* prepare for encryption with key in rk[] */
|
||||
.macro enc_prepare, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
load_round_keys \rk, \rounds, \temp
|
||||
.endm
|
||||
|
||||
/* prepare for encryption (again) but with new key in rk[] */
|
||||
.macro enc_switch_key, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
load_round_keys \rk, \rounds, \temp
|
||||
.endm
|
||||
|
||||
/* prepare for decryption with key in rk[] */
|
||||
.macro dec_prepare, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
load_round_keys \rk, \rounds, \temp
|
||||
.endm
|
||||
|
||||
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
|
||||
@ -110,14 +105,13 @@
|
||||
|
||||
/* up to 5 interleaved blocks */
|
||||
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
|
||||
cmp \rounds, #12
|
||||
blo 2222f /* 128 bits */
|
||||
beq 1111f /* 192 bits */
|
||||
tbz \rounds, #2, .L\@ /* 128 bits */
|
||||
round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
|
||||
round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
|
||||
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
|
||||
tbz \rounds, #1, .L\@ /* 192 bits */
|
||||
round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
|
||||
round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
|
||||
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
|
||||
.L\@: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
|
||||
round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
|
||||
.endr
|
||||
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
|
||||
|
@ -99,16 +99,16 @@
|
||||
ld1 {v15.4s}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
.La\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
movi v15.16b, #0x40
|
||||
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
|
||||
sub_bytes \in
|
||||
subs \i, \i, #1
|
||||
sub \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
cbz \i, .Lb\@
|
||||
mix_columns \in, \enc
|
||||
b 1111b
|
||||
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
b .La\@
|
||||
.Lb\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
|
||||
.macro encrypt_block, in, rounds, rk, rkp, i
|
||||
@ -206,7 +206,7 @@
|
||||
ld1 {v15.4s}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
.La\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
|
||||
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
|
||||
@ -216,13 +216,13 @@
|
||||
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
|
||||
sub_bytes_4x \in0, \in1, \in2, \in3
|
||||
subs \i, \i, #1
|
||||
sub \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
cbz \i, .Lb\@
|
||||
mix_columns_2x \in0, \in1, \enc
|
||||
mix_columns_2x \in2, \in3, \enc
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
b .La\@
|
||||
.Lb\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
|
||||
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
|
||||
|
Loading…
Reference in New Issue
Block a user