mirror of
https://github.com/jedisct1/libsodium.git
synced 2024-12-19 10:05:05 -07:00
Reorder AES ops on ARM
This commit is contained in:
parent
a3c44aba94
commit
6187ebc10f
@ -32,11 +32,11 @@ typedef uint64x2_t BlockVec;
|
||||
|
||||
#define LOAD128(a) vld1q_u64((const void *) a)
|
||||
#define STORE128(a, b) vst1q_u64(((void *) a), (b))
|
||||
#define AES_ENCRYPT(block_vec, rkey) \
|
||||
#define AES_XENCRYPT(block_vec, rkey) \
|
||||
vreinterpretq_u64_u8( \
|
||||
veorq_u8(vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0))), rkey))
|
||||
#define AES_ENCRYPTLAST(block_vec, rkey) \
|
||||
vreinterpretq_u64_u8(veorq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0)), rkey))
|
||||
vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey)))
|
||||
#define AES_XENCRYPTLAST(block_vec, rkey) \
|
||||
vreinterpretq_u64_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey))
|
||||
#define XOR128(a, b) veorq_u64((a), (b))
|
||||
#define AND128(a, b) vandq_u64((a), (b))
|
||||
#define OR128(a, b) vorrq_u64((a), (b))
|
||||
@ -148,11 +148,12 @@ encrypt(const State *st, unsigned char dst[16], const unsigned char src[16])
|
||||
|
||||
size_t i;
|
||||
|
||||
t = XOR128(LOAD128(src), st->rkeys[0]);
|
||||
for (i = 1; i < ROUNDS; i++) {
|
||||
t = AES_ENCRYPT(t, st->rkeys[i]);
|
||||
t = AES_XENCRYPT(LOAD128(src), st->rkeys[0]);
|
||||
for (i = 1; i < ROUNDS - 1; i++) {
|
||||
t = AES_XENCRYPT(t, st->rkeys[i]);
|
||||
}
|
||||
t = AES_ENCRYPTLAST(t, st->rkeys[ROUNDS]);
|
||||
t = AES_XENCRYPTLAST(t, st->rkeys[i]);
|
||||
t = XOR128(t, st->rkeys[ROUNDS]);
|
||||
STORE128(dst, t);
|
||||
}
|
||||
|
||||
@ -165,12 +166,12 @@ static inline void __vectorcall encrypt_xor_block(const State *st, unsigned char
|
||||
BlockVec ts;
|
||||
size_t i;
|
||||
|
||||
ts = XOR128(counter, st->rkeys[0]);
|
||||
for (i = 1; i < ROUNDS; i++) {
|
||||
ts = AES_ENCRYPT(ts, st->rkeys[i]);
|
||||
ts = AES_XENCRYPT(counter, st->rkeys[0]);
|
||||
for (i = 1; i < ROUNDS - 1; i++) {
|
||||
ts = AES_XENCRYPT(ts, st->rkeys[i]);
|
||||
}
|
||||
ts = AES_ENCRYPTLAST(ts, st->rkeys[i]);
|
||||
ts = XOR128(ts, LOAD128(src));
|
||||
ts = AES_XENCRYPTLAST(ts, st->rkeys[i]);
|
||||
ts = XOR128(ts, XOR128(st->rkeys[ROUNDS], LOAD128(src)));
|
||||
STORE128(dst, ts);
|
||||
}
|
||||
|
||||
@ -185,16 +186,16 @@ static inline void __vectorcall encrypt_xor_wide(const State *st,
|
||||
size_t i, j;
|
||||
|
||||
for (j = 0; j < PARALLEL_BLOCKS; j++) {
|
||||
ts[j] = XOR128(counters[j], st->rkeys[0]);
|
||||
ts[j] = AES_XENCRYPT(counters[j], st->rkeys[0]);
|
||||
}
|
||||
for (i = 1; i < ROUNDS; i++) {
|
||||
for (i = 1; i < ROUNDS - 1; i++) {
|
||||
for (j = 0; j < PARALLEL_BLOCKS; j++) {
|
||||
ts[j] = AES_ENCRYPT(ts[j], st->rkeys[i]);
|
||||
ts[j] = AES_XENCRYPT(ts[j], st->rkeys[i]);
|
||||
}
|
||||
}
|
||||
for (j = 0; j < PARALLEL_BLOCKS; j++) {
|
||||
ts[j] = AES_ENCRYPTLAST(ts[j], st->rkeys[i]);
|
||||
ts[j] = XOR128(ts[j], LOAD128(&src[16 * j]));
|
||||
ts[j] = AES_XENCRYPTLAST(ts[j], st->rkeys[i]);
|
||||
ts[j] = XOR128(ts[j], XOR128(st->rkeys[ROUNDS], LOAD128(&src[16 * j])));
|
||||
}
|
||||
for (j = 0; j < PARALLEL_BLOCKS; j++) {
|
||||
STORE128(&dst[16 * j], ts[j]);
|
||||
|
Loading…
Reference in New Issue
Block a user