1
mirror of https://github.com/jedisct1/libsodium.git synced 2024-12-19 10:05:05 -07:00

Reorder AES ops on ARM

This commit is contained in:
Frank Denis 2023-06-19 12:34:10 +02:00
parent a3c44aba94
commit 6187ebc10f

View File

@ -32,11 +32,11 @@ typedef uint64x2_t BlockVec;
#define LOAD128(a) vld1q_u64((const void *) a)
#define STORE128(a, b) vst1q_u64(((void *) a), (b))
#define AES_ENCRYPT(block_vec, rkey) \
#define AES_XENCRYPT(block_vec, rkey) \
vreinterpretq_u64_u8( \
veorq_u8(vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0))), rkey))
#define AES_ENCRYPTLAST(block_vec, rkey) \
vreinterpretq_u64_u8(veorq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0)), rkey))
vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey)))
#define AES_XENCRYPTLAST(block_vec, rkey) \
vreinterpretq_u64_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey))
#define XOR128(a, b) veorq_u64((a), (b))
#define AND128(a, b) vandq_u64((a), (b))
#define OR128(a, b) vorrq_u64((a), (b))
@ -148,11 +148,12 @@ encrypt(const State *st, unsigned char dst[16], const unsigned char src[16])
size_t i;
t = XOR128(LOAD128(src), st->rkeys[0]);
for (i = 1; i < ROUNDS; i++) {
t = AES_ENCRYPT(t, st->rkeys[i]);
t = AES_XENCRYPT(LOAD128(src), st->rkeys[0]);
for (i = 1; i < ROUNDS - 1; i++) {
t = AES_XENCRYPT(t, st->rkeys[i]);
}
t = AES_ENCRYPTLAST(t, st->rkeys[ROUNDS]);
t = AES_XENCRYPTLAST(t, st->rkeys[i]);
t = XOR128(t, st->rkeys[ROUNDS]);
STORE128(dst, t);
}
@ -165,12 +166,12 @@ static inline void __vectorcall encrypt_xor_block(const State *st, unsigned char
BlockVec ts;
size_t i;
ts = XOR128(counter, st->rkeys[0]);
for (i = 1; i < ROUNDS; i++) {
ts = AES_ENCRYPT(ts, st->rkeys[i]);
ts = AES_XENCRYPT(counter, st->rkeys[0]);
for (i = 1; i < ROUNDS - 1; i++) {
ts = AES_XENCRYPT(ts, st->rkeys[i]);
}
ts = AES_ENCRYPTLAST(ts, st->rkeys[i]);
ts = XOR128(ts, LOAD128(src));
ts = AES_XENCRYPTLAST(ts, st->rkeys[i]);
ts = XOR128(ts, XOR128(st->rkeys[ROUNDS], LOAD128(src)));
STORE128(dst, ts);
}
@ -185,16 +186,16 @@ static inline void __vectorcall encrypt_xor_wide(const State *st,
size_t i, j;
for (j = 0; j < PARALLEL_BLOCKS; j++) {
ts[j] = XOR128(counters[j], st->rkeys[0]);
ts[j] = AES_XENCRYPT(counters[j], st->rkeys[0]);
}
for (i = 1; i < ROUNDS; i++) {
for (i = 1; i < ROUNDS - 1; i++) {
for (j = 0; j < PARALLEL_BLOCKS; j++) {
ts[j] = AES_ENCRYPT(ts[j], st->rkeys[i]);
ts[j] = AES_XENCRYPT(ts[j], st->rkeys[i]);
}
}
for (j = 0; j < PARALLEL_BLOCKS; j++) {
ts[j] = AES_ENCRYPTLAST(ts[j], st->rkeys[i]);
ts[j] = XOR128(ts[j], LOAD128(&src[16 * j]));
ts[j] = AES_XENCRYPTLAST(ts[j], st->rkeys[i]);
ts[j] = XOR128(ts[j], XOR128(st->rkeys[ROUNDS], LOAD128(&src[16 * j])));
}
for (j = 0; j < PARALLEL_BLOCKS; j++) {
STORE128(&dst[16 * j], ts[j]);