From a67325c42c02161f27c3af6dd476e55843c5f9ad Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Sat, 25 May 2024 00:41:08 +0200 Subject: [PATCH] AEGIS: improve performance of AD absorption on x86_64 No apparent regression on other platforms. Adapted from libaegis. --- .../crypto_aead/aegis128l/aegis128l_common.h | 23 +++++++++++++++++-- .../crypto_aead/aegis256/aegis256_common.h | 21 +++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/libsodium/crypto_aead/aegis128l/aegis128l_common.h b/src/libsodium/crypto_aead/aegis128l/aegis128l_common.h index 1683d76f..6e503dc3 100644 --- a/src/libsodium/crypto_aead/aegis128l/aegis128l_common.h +++ b/src/libsodium/crypto_aead/aegis128l/aegis128l_common.h @@ -74,6 +74,19 @@ aegis128l_absorb(const uint8_t *const src, aes_block_t *const state) aegis128l_update(state, msg0, msg1); } +static inline void +aegis128l_absorb2(const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg0, msg1, msg2, msg3; + + msg0 = AES_BLOCK_LOAD(src + 0 * AES_BLOCK_LENGTH); + msg1 = AES_BLOCK_LOAD(src + 1 * AES_BLOCK_LENGTH); + msg2 = AES_BLOCK_LOAD(src + 2 * AES_BLOCK_LENGTH); + msg3 = AES_BLOCK_LOAD(src + 3 * AES_BLOCK_LENGTH); + aegis128l_update(state, msg0, msg1); + aegis128l_update(state, msg2, msg3); +} + static void aegis128l_enc(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state) { @@ -154,7 +167,10 @@ encrypt_detached(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size aegis128l_init(k, npub, state); - for (i = 0; i + RATE <= adlen; i += RATE) { + for (i = 0; i + RATE * 2 <= adlen; i += RATE * 2) { + aegis128l_absorb2(ad + i, state); + } + for (; i + RATE <= adlen; i += RATE) { aegis128l_absorb(ad + i, state); } if (adlen % RATE) { @@ -189,7 +205,10 @@ decrypt_detached(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, aegis128l_init(k, npub, state); - for (i = 0; i + RATE <= adlen; i += RATE) { + for (i = 0; i + RATE * 2 <= adlen; i += RATE * 2) { + aegis128l_absorb2(ad + i, state); + } + for (; i + RATE <= adlen; i += RATE) { aegis128l_absorb(ad + i, state); } if (adlen % RATE) { diff --git a/src/libsodium/crypto_aead/aegis256/aegis256_common.h b/src/libsodium/crypto_aead/aegis256/aegis256_common.h index 17c2c415..adf837a9 100644 --- a/src/libsodium/crypto_aead/aegis256/aegis256_common.h +++ b/src/libsodium/crypto_aead/aegis256/aegis256_common.h @@ -73,6 +73,17 @@ aegis256_absorb(const uint8_t *const src, aes_block_t *const state) aegis256_update(state, msg); } +static inline void +aegis256_absorb2(const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg, msg2; + + msg = AES_BLOCK_LOAD(src + 0 * AES_BLOCK_LENGTH); + msg2 = AES_BLOCK_LOAD(src + 1 * AES_BLOCK_LENGTH); + aegis256_update(state, msg); + aegis256_update(state, msg2); +} + static void aegis256_enc(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state) { @@ -139,7 +150,10 @@ encrypt_detached(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size aegis256_init(k, npub, state); - for (i = 0; i + RATE <= adlen; i += RATE) { + for (i = 0; i + 2 * RATE <= adlen; i += 2 * RATE) { + aegis256_absorb2(ad + i, state); + } + for (; i + RATE <= adlen; i += RATE) { aegis256_absorb(ad + i, state); } if (adlen % RATE) { @@ -174,7 +188,10 @@ decrypt_detached(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, aegis256_init(k, npub, state); - for (i = 0; i + RATE <= adlen; i += RATE) { + for (i = 0; i + 2 * RATE <= adlen; i += 2 * RATE) { + aegis256_absorb2(ad + i, state); + } + for (; i + RATE <= adlen; i += RATE) { aegis256_absorb(ad + i, state); } if (adlen % RATE) {