diff --git a/builds/msvc/vs2010/libsodium/libsodium.vcxproj b/builds/msvc/vs2010/libsodium/libsodium.vcxproj index ab8bcb0a..db42c9b2 100644 --- a/builds/msvc/vs2010/libsodium/libsodium.vcxproj +++ b/builds/msvc/vs2010/libsodium/libsodium.vcxproj @@ -180,7 +180,9 @@ + + diff --git a/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters b/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters index 363a53bc..cd37679d 100644 --- a/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +++ b/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters @@ -333,9 +333,15 @@ crypto_aead\aegis256\armcrypto + + crypto_aead\aes256gcm + crypto_aead\aes256gcm\aesni + + crypto_aead\aes256gcm\armcrypto + crypto_aead\chacha20poly1305\sodium @@ -802,6 +808,9 @@ {7eb51140-a50f-3f50-b379-83677a82496c} + + {507bb9aa-b65c-3034-946b-bcd375b7deaa} + {1f4d6dd1-517f-3eeb-b974-2304ada5e67a} diff --git a/builds/msvc/vs2012/libsodium/libsodium.vcxproj b/builds/msvc/vs2012/libsodium/libsodium.vcxproj index f0210427..14c0ca22 100644 --- a/builds/msvc/vs2012/libsodium/libsodium.vcxproj +++ b/builds/msvc/vs2012/libsodium/libsodium.vcxproj @@ -180,7 +180,9 @@ + + diff --git a/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters b/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters index 363a53bc..cd37679d 100644 --- a/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +++ b/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters @@ -333,9 +333,15 @@ crypto_aead\aegis256\armcrypto + + crypto_aead\aes256gcm + crypto_aead\aes256gcm\aesni + + crypto_aead\aes256gcm\armcrypto + crypto_aead\chacha20poly1305\sodium @@ -802,6 +808,9 @@ {7eb51140-a50f-3f50-b379-83677a82496c} + + {507bb9aa-b65c-3034-946b-bcd375b7deaa} + {1f4d6dd1-517f-3eeb-b974-2304ada5e67a} diff --git a/builds/msvc/vs2013/libsodium/libsodium.vcxproj b/builds/msvc/vs2013/libsodium/libsodium.vcxproj index d4d521a7..a1ab3e78 100644 --- a/builds/msvc/vs2013/libsodium/libsodium.vcxproj +++ b/builds/msvc/vs2013/libsodium/libsodium.vcxproj @@ -180,7 +180,9 @@ + + diff --git a/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters b/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters index 363a53bc..cd37679d 100644 --- a/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +++ b/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters @@ -333,9 +333,15 @@ crypto_aead\aegis256\armcrypto + + crypto_aead\aes256gcm + crypto_aead\aes256gcm\aesni + + crypto_aead\aes256gcm\armcrypto + crypto_aead\chacha20poly1305\sodium @@ -802,6 +808,9 @@ {7eb51140-a50f-3f50-b379-83677a82496c} + + {507bb9aa-b65c-3034-946b-bcd375b7deaa} + {1f4d6dd1-517f-3eeb-b974-2304ada5e67a} diff --git a/builds/msvc/vs2015/libsodium/libsodium.vcxproj b/builds/msvc/vs2015/libsodium/libsodium.vcxproj index ab07ff28..3cd94db3 100644 --- a/builds/msvc/vs2015/libsodium/libsodium.vcxproj +++ b/builds/msvc/vs2015/libsodium/libsodium.vcxproj @@ -180,7 +180,9 @@ + + diff --git a/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters b/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters index 363a53bc..cd37679d 100644 --- a/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +++ b/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters @@ -333,9 +333,15 @@ crypto_aead\aegis256\armcrypto + + crypto_aead\aes256gcm + crypto_aead\aes256gcm\aesni + + crypto_aead\aes256gcm\armcrypto + crypto_aead\chacha20poly1305\sodium @@ -802,6 +808,9 @@ {7eb51140-a50f-3f50-b379-83677a82496c} + + {507bb9aa-b65c-3034-946b-bcd375b7deaa} + {1f4d6dd1-517f-3eeb-b974-2304ada5e67a} diff --git a/builds/msvc/vs2017/libsodium/libsodium.vcxproj b/builds/msvc/vs2017/libsodium/libsodium.vcxproj index 0de45ab3..6f173664 100644 --- a/builds/msvc/vs2017/libsodium/libsodium.vcxproj +++ b/builds/msvc/vs2017/libsodium/libsodium.vcxproj @@ -180,7 +180,9 @@ + + diff --git a/builds/msvc/vs2017/libsodium/libsodium.vcxproj.filters b/builds/msvc/vs2017/libsodium/libsodium.vcxproj.filters index 363a53bc..cd37679d 100644 --- a/builds/msvc/vs2017/libsodium/libsodium.vcxproj.filters +++ b/builds/msvc/vs2017/libsodium/libsodium.vcxproj.filters @@ -333,9 +333,15 @@ crypto_aead\aegis256\armcrypto + + crypto_aead\aes256gcm + crypto_aead\aes256gcm\aesni + + crypto_aead\aes256gcm\armcrypto + crypto_aead\chacha20poly1305\sodium @@ -802,6 +808,9 @@ {7eb51140-a50f-3f50-b379-83677a82496c} + + {507bb9aa-b65c-3034-946b-bcd375b7deaa} + {1f4d6dd1-517f-3eeb-b974-2304ada5e67a} diff --git a/builds/msvc/vs2019/libsodium/libsodium.vcxproj b/builds/msvc/vs2019/libsodium/libsodium.vcxproj index 519d448e..14bcf8db 100644 --- a/builds/msvc/vs2019/libsodium/libsodium.vcxproj +++ b/builds/msvc/vs2019/libsodium/libsodium.vcxproj @@ -204,7 +204,9 @@ + + diff --git a/builds/msvc/vs2019/libsodium/libsodium.vcxproj.filters b/builds/msvc/vs2019/libsodium/libsodium.vcxproj.filters index 363a53bc..cd37679d 100644 --- a/builds/msvc/vs2019/libsodium/libsodium.vcxproj.filters +++ b/builds/msvc/vs2019/libsodium/libsodium.vcxproj.filters @@ -333,9 +333,15 @@ crypto_aead\aegis256\armcrypto + + crypto_aead\aes256gcm + crypto_aead\aes256gcm\aesni + + crypto_aead\aes256gcm\armcrypto + crypto_aead\chacha20poly1305\sodium @@ -802,6 +808,9 @@ {7eb51140-a50f-3f50-b379-83677a82496c} + + {507bb9aa-b65c-3034-946b-bcd375b7deaa} + {1f4d6dd1-517f-3eeb-b974-2304ada5e67a} diff --git a/builds/msvc/vs2022/libsodium/libsodium.vcxproj b/builds/msvc/vs2022/libsodium/libsodium.vcxproj index 2aedf72c..5f0a777d 100644 --- a/builds/msvc/vs2022/libsodium/libsodium.vcxproj +++ b/builds/msvc/vs2022/libsodium/libsodium.vcxproj @@ -133,7 +133,9 @@ + + diff --git a/builds/msvc/vs2022/libsodium/libsodium.vcxproj.filters b/builds/msvc/vs2022/libsodium/libsodium.vcxproj.filters index 363a53bc..cd37679d 100644 --- a/builds/msvc/vs2022/libsodium/libsodium.vcxproj.filters +++ b/builds/msvc/vs2022/libsodium/libsodium.vcxproj.filters @@ -333,9 +333,15 @@ crypto_aead\aegis256\armcrypto + + crypto_aead\aes256gcm + crypto_aead\aes256gcm\aesni + + crypto_aead\aes256gcm\armcrypto + crypto_aead\chacha20poly1305\sodium @@ -802,6 +808,9 @@ {7eb51140-a50f-3f50-b379-83677a82496c} + + {507bb9aa-b65c-3034-946b-bcd375b7deaa} + {1f4d6dd1-517f-3eeb-b974-2304ada5e67a} diff --git a/libsodium.vcxproj b/libsodium.vcxproj index 500f518a..c82f5cbf 100644 --- a/libsodium.vcxproj +++ b/libsodium.vcxproj @@ -418,7 +418,9 @@ + + diff --git a/libsodium.vcxproj.filters b/libsodium.vcxproj.filters index 42bef65f..7c72d364 100644 --- a/libsodium.vcxproj.filters +++ b/libsodium.vcxproj.filters @@ -324,9 +324,15 @@ Source Files + + Source Files + Source Files + + Source Files + Source Files diff --git a/src/libsodium/Makefile.am b/src/libsodium/Makefile.am index f6e67f2d..59c6e814 100644 --- a/src/libsodium/Makefile.am +++ b/src/libsodium/Makefile.am @@ -4,6 +4,7 @@ lib_LTLIBRARIES = \ libsodium_la_SOURCES = \ crypto_aead/aegis128l/aead_aegis128l.c \ crypto_aead/aegis256/aead_aegis256.c \ + crypto_aead/aes256gcm/aead_aes256gcm.c \ crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c \ crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c \ crypto_auth/crypto_auth.c \ @@ -223,7 +224,8 @@ libarmcrypto_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ @CFLAGS_ARMCRYPTO@ libarmcrypto_la_SOURCES = \ crypto_aead/aegis128l/armcrypto/aead_aegis128l_armcrypto.c \ - crypto_aead/aegis256/armcrypto/aead_aegis256_armcrypto.c + crypto_aead/aegis256/armcrypto/aead_aegis256_armcrypto.c \ + crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c libaesni_la_LDFLAGS = $(libsodium_la_LDFLAGS) libaesni_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ diff --git a/src/libsodium/crypto_aead/aes256gcm/aead_aes256gcm.c b/src/libsodium/crypto_aead/aes256gcm/aead_aes256gcm.c new file mode 100644 index 00000000..9db14e86 --- /dev/null +++ b/src/libsodium/crypto_aead/aes256gcm/aead_aes256gcm.c @@ -0,0 +1,157 @@ +#include +#include + +#include "crypto_aead_aes256gcm.h" +#include "private/common.h" +#include "randombytes.h" + +size_t +crypto_aead_aes256gcm_keybytes(void) +{ + return crypto_aead_aes256gcm_KEYBYTES; +} + +size_t +crypto_aead_aes256gcm_nsecbytes(void) +{ + return crypto_aead_aes256gcm_NSECBYTES; +} + +size_t +crypto_aead_aes256gcm_npubbytes(void) +{ + return crypto_aead_aes256gcm_NPUBBYTES; +} + +size_t +crypto_aead_aes256gcm_abytes(void) +{ + return crypto_aead_aes256gcm_ABYTES; +} + +size_t +crypto_aead_aes256gcm_statebytes(void) +{ + return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U; +} + +size_t +crypto_aead_aes256gcm_messagebytes_max(void) +{ + return crypto_aead_aes256gcm_MESSAGEBYTES_MAX; +} + +void +crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES]) +{ + randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES); +} + +#if !((defined(HAVE_ARMCRYPTO) && defined(__GNUC__)) || \ + (defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H))) + +#ifndef ENOSYS +#define ENOSYS ENXIO +#endif + +int +crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, + unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_is_available(void) +{ + return 0; +} + +#endif \ No newline at end of file diff --git a/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c b/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c index 9583fd7a..5e3e8294 100644 --- a/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +++ b/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c @@ -1,10 +1,3 @@ - -/* - * AES256-GCM, based on the "Intel Carry-Less Multiplication Instruction and its - * Usage for Computing the GCM Mode" paper and reference code, using the - * aggregated reduction method. Originally adapted by Romain Dolbeau. - */ - #include #include #include @@ -22,7 +15,6 @@ #if defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H) -#if defined(__x86_64__) || defined(__i386__) #ifdef __GNUC__ #pragma GCC target("ssse3") #pragma GCC target("aes") @@ -30,17 +22,11 @@ #endif #include #include -#elif defined(__aarch64__) -#include -#else -#error Unsupported architecture -#endif #define ABYTES crypto_aead_aes256gcm_ABYTES #define NPUBBYTES crypto_aead_aes256gcm_NPUBBYTES #define KEYBYTES crypto_aead_aes256gcm_KEYBYTES -#ifdef __x86_64__ #define PARALLEL_BLOCKS 7 #undef USE_KARATSUBA_MULTIPLICATION @@ -74,66 +60,6 @@ typedef __m128i BlockVec; #define PREFETCH_READ(x) __builtin_prefetch((x), 0, 2) #define PREFETCH_WRITE(x) __builtin_prefetch((x), 1, 2); -#elif defined(HAVE_ARMCRYPTO) - -#define PARALLEL_BLOCKS 6 -#undef USE_KARATSUBA_MULTIPLICATION - -typedef uint64x2_t BlockVec; - -#define LOAD128(a) vld1q_u64((const void *) a) -#define STORE128(a, b) vst1q_u64(((void *) a), (b)) -#define AES_ENCRYPT(block_vec, rkey) \ - vreinterpretq_u64_u8( \ - veorq_u8(vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0))), rkey)) -#define AES_ENCRYPTLAST(block_vec, rkey) \ - vreinterpretq_u64_u8(veorq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0)), rkey)) -#define XOR128(a, b) veorq_u64((a), (b)) -#define AND128(a, b) vandq_u64((a), (b)) -#define OR128(a, b) vorrq_u64((a), (b)) -#define SET64x2(a, b) vsetq_lane_u64((uint64_t) (a), vmovq_n_u64((uint64_t) (b)), 1) -#define ZERO128 vmovq_n_u8(0) -#define ONE128 SET64x2(0, 1) -#define ADD64x2(a, b) vaddq_u64((a), (b)) -#define SUB64x2(a, b) vsubq_u64((a), (b)) -#define SHL64x2(a, b) vshlq_n_u64((a), (b)) -#define SHR64x2(a, b) vshrq_n_u64((a), (b)) -#define REV128(x) \ - vreinterpretq_u64_u8(__builtin_shufflevector(vreinterpretq_u8_u64(x), vreinterpretq_u8_u64(x), \ - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, \ - 1, 0)) -#define SHUFFLE32x4(x, a, b, c, d) \ - vreinterpretq_u64_u32(__builtin_shufflevector(vreinterpretq_u32_u64(x), \ - vreinterpretq_u32_u64(x), (a), (b), (c), (d))) -#define BYTESHL128(a, b) vreinterpretq_u64_u8(vextq_s8(vdupq_n_s8(0), (int8x16_t) a, 16 - (b))) -#define BYTESHR128(a, b) vreinterpretq_u64_u8(vextq_s8((int8x16_t) a, vdupq_n_s8(0), (b))) - -#define SHL128(a, b) OR128(SHL64x2((a), (b)), SHR64x2(BYTESHL128((a), 8), 64 - (b))) -#define CLMULLO128(a, b) \ - vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_low_u64(b))) -#define CLMULHI128(a, b) \ - vreinterpretq_u64_p128(vmull_high_p64(vreinterpretq_p64_s64(a), vreinterpretq_p64_s64(b))) -#define CLMULLOHI128(a, b) \ - vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_high_u64(b))) -#define CLMULHILO128(a, b) \ - vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_high_u64(a), (poly64_t) vget_low_u64(b))) -#define PREFETCH_READ(x) __builtin_prefetch((x), 0, 2) -#define PREFETCH_WRITE(x) __builtin_prefetch((x), 1, 2); - -static inline BlockVec -AES_KEYGEN(BlockVec block_vec, const int rc) -{ - uint8x16_t a = vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0)); - const uint8x16_t b = - __builtin_shufflevector(a, a, 4, 1, 14, 11, 1, 14, 11, 4, 12, 9, 6, 3, 9, 6, 3, 12); - const uint64x2_t c = SET64x2((uint64_t) rc << 32, (uint64_t) rc << 32); - return XOR128(b, c); -} - -#else -#error Unsupported architecture -#endif - #define ROUNDS 14 #define PC_COUNT (2 * PARALLEL_BLOCKS) @@ -1095,152 +1021,4 @@ crypto_aead_aes256gcm_is_available(void) return sodium_runtime_has_pclmul() & sodium_runtime_has_aesni(); } -#else - -#ifndef ENOSYS -#define ENOSYS ENXIO #endif - -int -crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, - unsigned long long *maclen_p, const unsigned char *m, - unsigned long long mlen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *nsec, - const unsigned char *npub, const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, - unsigned long long mlen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *nsec, - const unsigned char *npub, const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *mac, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, - unsigned long long *maclen_p, const unsigned char *m, - unsigned long long mlen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *nsec, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *st_) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const crypto_aead_aes256gcm_state *st_) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *mac, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const crypto_aead_aes256gcm_state *st_) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, - unsigned char *nsec, const unsigned char *c, - unsigned long long clen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const crypto_aead_aes256gcm_state *st_) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_is_available(void) -{ - return 0; -} - -#endif - -size_t -crypto_aead_aes256gcm_keybytes(void) -{ - return crypto_aead_aes256gcm_KEYBYTES; -} - -size_t -crypto_aead_aes256gcm_nsecbytes(void) -{ - return crypto_aead_aes256gcm_NSECBYTES; -} - -size_t -crypto_aead_aes256gcm_npubbytes(void) -{ - return crypto_aead_aes256gcm_NPUBBYTES; -} - -size_t -crypto_aead_aes256gcm_abytes(void) -{ - return crypto_aead_aes256gcm_ABYTES; -} - -size_t -crypto_aead_aes256gcm_statebytes(void) -{ - return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U; -} - -size_t -crypto_aead_aes256gcm_messagebytes_max(void) -{ - return crypto_aead_aes256gcm_MESSAGEBYTES_MAX; -} - -void -crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES]) -{ - randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES); -} diff --git a/src/libsodium/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c b/src/libsodium/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c new file mode 100644 index 00000000..e736ecda --- /dev/null +++ b/src/libsodium/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c @@ -0,0 +1,1039 @@ +#include +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_aes256gcm.h" +#include "crypto_verify_16.h" +#include "export.h" +#include "private/common.h" +#include "randombytes.h" +#include "runtime.h" +#include "utils.h" + +#if defined(HAVE_ARMCRYPTO) && defined(__GNUC__) + +#include + +#define ABYTES crypto_aead_aes256gcm_ABYTES +#define NPUBBYTES crypto_aead_aes256gcm_NPUBBYTES +#define KEYBYTES crypto_aead_aes256gcm_KEYBYTES + +#define PARALLEL_BLOCKS 6 +#undef USE_KARATSUBA_MULTIPLICATION + +typedef uint64x2_t BlockVec; + +#define LOAD128(a) vld1q_u64((const void *) a) +#define STORE128(a, b) vst1q_u64(((void *) a), (b)) +#define AES_ENCRYPT(block_vec, rkey) \ + vreinterpretq_u64_u8( \ + veorq_u8(vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0))), rkey)) +#define AES_ENCRYPTLAST(block_vec, rkey) \ + vreinterpretq_u64_u8(veorq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0)), rkey)) +#define XOR128(a, b) veorq_u64((a), (b)) +#define AND128(a, b) vandq_u64((a), (b)) +#define OR128(a, b) vorrq_u64((a), (b)) +#define SET64x2(a, b) vsetq_lane_u64((uint64_t) (a), vmovq_n_u64((uint64_t) (b)), 1) +#define ZERO128 vmovq_n_u8(0) +#define ONE128 SET64x2(0, 1) +#define ADD64x2(a, b) vaddq_u64((a), (b)) +#define SUB64x2(a, b) vsubq_u64((a), (b)) +#define SHL64x2(a, b) vshlq_n_u64((a), (b)) +#define SHR64x2(a, b) vshrq_n_u64((a), (b)) +#define REV128(x) \ + vreinterpretq_u64_u8(__builtin_shufflevector(vreinterpretq_u8_u64(x), vreinterpretq_u8_u64(x), \ + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, \ + 1, 0)) +#define SHUFFLE32x4(x, a, b, c, d) \ + vreinterpretq_u64_u32(__builtin_shufflevector(vreinterpretq_u32_u64(x), \ + vreinterpretq_u32_u64(x), (a), (b), (c), (d))) +#define BYTESHL128(a, b) vreinterpretq_u64_u8(vextq_s8(vdupq_n_s8(0), (int8x16_t) a, 16 - (b))) +#define BYTESHR128(a, b) vreinterpretq_u64_u8(vextq_s8((int8x16_t) a, vdupq_n_s8(0), (b))) + +#define SHL128(a, b) OR128(SHL64x2((a), (b)), SHR64x2(BYTESHL128((a), 8), 64 - (b))) +#define CLMULLO128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_low_u64(b))) +#define CLMULHI128(a, b) \ + vreinterpretq_u64_p128(vmull_high_p64(vreinterpretq_p64_s64(a), vreinterpretq_p64_s64(b))) +#define CLMULLOHI128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_high_u64(b))) +#define CLMULHILO128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_high_u64(a), (poly64_t) vget_low_u64(b))) +#define PREFETCH_READ(x) __builtin_prefetch((x), 0, 2) +#define PREFETCH_WRITE(x) __builtin_prefetch((x), 1, 2); + +static inline BlockVec +AES_KEYGEN(BlockVec block_vec, const int rc) +{ + uint8x16_t a = vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0)); + const uint8x16_t b = + __builtin_shufflevector(a, a, 4, 1, 14, 11, 1, 14, 11, 4, 12, 9, 6, 3, 9, 6, 3, 12); + const uint64x2_t c = SET64x2((uint64_t) rc << 32, (uint64_t) rc << 32); + return XOR128(b, c); +} + +#define ROUNDS 14 + +#define PC_COUNT (2 * PARALLEL_BLOCKS) + +typedef struct I256 { + BlockVec hi; + BlockVec lo; + BlockVec mid; +} I256; + +static inline I256 +XOR256(const I256 a, const I256 b) +{ + return (I256) { + .hi = XOR128(a.hi, b.hi), + .lo = XOR128(a.lo, b.lo), + .mid = XOR128(a.mid, b.mid), + }; +} + +typedef BlockVec Precomp; + +typedef struct GHash { + BlockVec acc; +} GHash; + +typedef struct State { + BlockVec rkeys[ROUNDS + 1]; + Precomp hx[PC_COUNT]; +} State; + +static void +expand256(const unsigned char key[KEYBYTES], BlockVec rkeys[1 + ROUNDS]) +{ + BlockVec t1, t2, s; + size_t i = 0; + +#define EXPAND_KEY_1(RC) \ + rkeys[i++] = t2; \ + s = AES_KEYGEN(t2, RC); \ + t1 = XOR128(t1, BYTESHL128(t1, 4)); \ + t1 = XOR128(t1, BYTESHL128(t1, 8)); \ + t1 = XOR128(t1, SHUFFLE32x4(s, 3, 3, 3, 3)); + +#define EXPAND_KEY_2(RC) \ + rkeys[i++] = t1; \ + s = AES_KEYGEN(t1, RC); \ + t2 = XOR128(t2, BYTESHL128(t2, 4)); \ + t2 = XOR128(t2, BYTESHL128(t2, 8)); \ + t2 = XOR128(t2, SHUFFLE32x4(s, 2, 2, 2, 2)); + + t1 = LOAD128(&key[0]); + t2 = LOAD128(&key[16]); + + rkeys[i++] = t1; + EXPAND_KEY_1(0x01); + EXPAND_KEY_2(0x01); + EXPAND_KEY_1(0x02); + EXPAND_KEY_2(0x02); + EXPAND_KEY_1(0x04); + EXPAND_KEY_2(0x04); + EXPAND_KEY_1(0x08); + EXPAND_KEY_2(0x08); + EXPAND_KEY_1(0x10); + EXPAND_KEY_2(0x10); + EXPAND_KEY_1(0x20); + EXPAND_KEY_2(0x20); + EXPAND_KEY_1(0x40); + rkeys[i++] = t1; +} + +/* Encrypt a single AES block */ + +static inline void +encrypt(const State *st, unsigned char dst[16], const unsigned char src[16]) +{ + BlockVec t; + + size_t i; + + t = XOR128(LOAD128(src), st->rkeys[0]); + for (i = 1; i < ROUNDS; i++) { + t = AES_ENCRYPT(t, st->rkeys[i]); + } + t = AES_ENCRYPTLAST(t, st->rkeys[ROUNDS]); + STORE128(dst, t); +} + +/* Encrypt and add a single AES block */ + +static inline void +encrypt_xor_block(const State *st, unsigned char dst[16], const unsigned char src[16], + const BlockVec counter) +{ + BlockVec ts; + size_t i; + + ts = XOR128(counter, st->rkeys[0]); + for (i = 1; i < ROUNDS; i++) { + ts = AES_ENCRYPT(ts, st->rkeys[i]); + } + ts = AES_ENCRYPTLAST(ts, st->rkeys[i]); + ts = XOR128(ts, LOAD128(src)); + STORE128(dst, ts); +} + +/* Encrypt and add PARALLEL_BLOCKS AES blocks */ + +static inline void +encrypt_xor_wide(const State *st, unsigned char dst[16 * PARALLEL_BLOCKS], + const unsigned char src[16 * PARALLEL_BLOCKS], + const BlockVec counters[PARALLEL_BLOCKS]) +{ + BlockVec ts[PARALLEL_BLOCKS]; + size_t i, j; + + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = XOR128(counters[j], st->rkeys[0]); + } + for (i = 1; i < ROUNDS; i++) { + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_ENCRYPT(ts[j], st->rkeys[i]); + } + } + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_ENCRYPTLAST(ts[j], st->rkeys[i]); + ts[j] = XOR128(ts[j], LOAD128(&src[16 * j])); + } + for (j = 0; j < PARALLEL_BLOCKS; j++) { + STORE128(&dst[16 * j], ts[j]); + } +} + +/* Square a field element */ + +static inline I256 +clsq128(const BlockVec x) +{ + const BlockVec x_hi = BYTESHR128(x, 8); + const BlockVec mid = XOR128(x, x_hi); + const BlockVec r_lo = CLMULLO128(x, x); + const BlockVec r_hi = CLMULHI128(x, x); + const BlockVec r_mid = XOR128(CLMULLO128(mid, mid), XOR128(r_lo, r_hi)); + + return (I256) { + .hi = r_hi, + .lo = r_lo, + .mid = r_mid, + }; +} + +/* Multiply two field elements -- Textbook multiplication is faster than Karatsuba on some recent + * CPUs */ + +static inline I256 +clmul128(const BlockVec x, const BlockVec y) +{ +#ifdef USE_KARATSUBA_MULTIPLICATION + const BlockVec x_hi = BYTESHR128(x, 8); + const BlockVec y_hi = BYTESHR128(y, 8); + const BlockVec r_lo = CLMULLO128(x, y); + const BlockVec r_hi = CLMULHI128(x, y); + const BlockVec r_mid = XOR128(CLMULLO128(XOR128(x, x_hi), XOR128(y, y_hi)), XOR128(r_lo, r_hi)); + + return (I256) { + .hi = r_hi, + .lo = r_lo, + .mid = r_mid, + }; +#else + const BlockVec r_hi = CLMULHI128(x, y); + const BlockVec r_lo = CLMULLO128(x, y); + const BlockVec r_mid = XOR128(CLMULHILO128(x, y), CLMULLOHI128(x, y)); + + return (I256) { + .hi = r_hi, + .lo = r_lo, + .mid = r_mid, + }; +#endif +} + +/* Merge the middle word and reduce a field element */ + +static inline BlockVec +gcm_reduce(const I256 x) +{ + const BlockVec hi = XOR128(x.hi, BYTESHR128(x.mid, 8)); + const BlockVec lo = XOR128(x.lo, BYTESHL128(x.mid, 8)); + + const BlockVec p64 = SET64x2(0, 0xc200000000000000); + const BlockVec a = CLMULLO128(lo, p64); + const BlockVec b = XOR128(SHUFFLE32x4(lo, 2, 3, 0, 1), a); + const BlockVec c = CLMULLO128(b, p64); + const BlockVec d = XOR128(SHUFFLE32x4(b, 2, 3, 0, 1), c); + + return XOR128(d, hi); +} + +/* Precompute powers of H from `from` to `to` */ + +static inline void +precomp(Precomp hx[PC_COUNT], const size_t from, const size_t to) +{ + const Precomp h = hx[0]; + size_t i; + + for (i = from & ~1U; i < to; i += 2) { + hx[i] = gcm_reduce(clmul128(hx[i - 1], h)); + hx[i + 1] = gcm_reduce(clsq128(hx[i / 2])); + } +} + +/* Precompute powers of H given a key and a block count */ + +static void +precomp_for_block_count(Precomp hx[PC_COUNT], const unsigned char gh_key[16], + const size_t block_count) +{ + const BlockVec h0 = REV128(LOAD128(gh_key)); + BlockVec carry = SET64x2(0xc200000000000000, 1); + BlockVec mask = SUB64x2(ZERO128, SHR64x2(h0, 63)); + BlockVec h0_shifted; + BlockVec h; + + mask = SHUFFLE32x4(mask, 3, 3, 3, 3); + carry = AND128(carry, mask); + h0_shifted = SHL128(h0, 1); + h = XOR128(h0_shifted, carry); + + hx[0] = h; + hx[1] = gcm_reduce(clsq128(hx[0])); + + if (block_count >= PC_COUNT) { + precomp(hx, 2, PC_COUNT); + } else { + precomp(hx, 2, block_count); + } +} + +/* Initialize a GHash */ + +static inline void +gh_init(GHash *sth) +{ + sth->acc = ZERO128; +} + +/* Absorb ad_len bytes of associated data. There has to be no partial block. */ + +static inline void +gh_ad_blocks(const State *st, GHash *sth, const unsigned char *ad, size_t ad_len) +{ + size_t i; + + i = (size_t) 0U; + for (; i + PC_COUNT * 16 <= ad_len; i += PC_COUNT * 16) { + BlockVec m = REV128(LOAD128(ad + i)); + I256 u = clmul128(XOR128(sth->acc, m), st->hx[PC_COUNT - 1 - 0]); + size_t j; + + for (j = 1; j < PC_COUNT; j += 1) { + m = REV128(LOAD128(ad + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[PC_COUNT - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + for (; i + PC_COUNT * 16 / 2 <= ad_len; i += PC_COUNT * 16 / 2) { + BlockVec m = REV128(LOAD128(ad + i)); + I256 u = clmul128(XOR128(sth->acc, m), st->hx[PC_COUNT / 2 - 1 - 0]); + size_t j; + + for (j = 1; j < PC_COUNT / 2; j += 1) { + m = REV128(LOAD128(ad + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[PC_COUNT / 2 - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + for (; i + 4 * 16 <= ad_len; i += 4 * 16) { + BlockVec m = REV128(LOAD128(ad + i)); + I256 u = clmul128(XOR128(sth->acc, m), st->hx[4 - 1 - 0]); + size_t j; + + for (j = 1; j < 4; j += 1) { + m = REV128(LOAD128(ad + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[4 - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + for (; i + 2 * 16 <= ad_len; i += 2 * 16) { + BlockVec m = REV128(LOAD128(ad + i)); + I256 u = clmul128(XOR128(sth->acc, m), st->hx[2 - 1 - 0]); + size_t j; + + for (j = 1; j < 2; j += 1) { + m = REV128(LOAD128(ad + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[2 - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + if (i < ad_len) { + const size_t n = (ad_len - i) / 16; + BlockVec m = REV128(LOAD128(ad + i)); + I256 u = clmul128(XOR128(sth->acc, m), st->hx[n - 1 - 0]); + size_t j; + + for (j = 1; j < n; j += 1) { + m = REV128(LOAD128(ad + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[n - 1 - j])); + } + i += n * 16; + sth->acc = gcm_reduce(u); + } +} + +/* Increment counters */ + +static inline BlockVec +incr_counters(BlockVec rev_counters[], BlockVec counter, const size_t n) +{ + size_t i; + + const BlockVec one = ONE128; + for (i = 0; i < n; i++) { + rev_counters[i] = REV128(counter); + counter = ADD64x2(counter, one); + } + return counter; +} + +/* Compute the number of required blocks to encrypt and authenticate `ad_len` of associated data, + * and `m_len` of encrypted bytes. Return `0` if limits would be exceeded.*/ + +static inline size_t +required_blocks(const size_t ad_len, const size_t m_len) +{ + const size_t ad_blocks = (ad_len + 15) / 16; + const size_t m_blocks = (m_len + 15) / 16; + + if (ad_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || + m_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || ad_len < ad_blocks || m_len < m_blocks || + m_blocks >= (1ULL << 32) - 2) { + return 0; + } + return ad_blocks + m_blocks + 1; +} + +/* Generic AES-GCM encryption. "Generic" as it can handle arbitrary input sizes, +unlike a length-limited version that would precompute all the required powers of H */ + +static void +aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst, + const unsigned char *src, size_t src_len, const unsigned char *ad, + size_t ad_len, unsigned char counter_[16]) +{ + CRYPTO_ALIGN(32) I256 u; + CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16]; + const BlockVec one = ONE128; + BlockVec final_block; + BlockVec rev_counters[PARALLEL_BLOCKS]; + BlockVec m; + BlockVec counter; + size_t i; + size_t j; + size_t left; + size_t pi; + + COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0); + + /* Associated data */ + + if (ad != NULL && ad_len != 0) { + gh_ad_blocks(st, sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, sth, pad, sizeof pad); + } + } + + /* Encrypted data */ + + counter = REV128(LOAD128(counter_)); + i = 0; + + /* 2*PARALLEL_BLOCKS aggregation */ + + if (src_len - i >= 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + + for (; i + 2 * PARALLEL_BLOCKS * 16 <= src_len; i += 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + pi = i - PARALLEL_BLOCKS * 16; + m = REV128(LOAD128(dst + pi)); + u = clmul128(XOR128(sth->acc, m), st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + m = REV128(LOAD128(dst + pi + j * 16)); + u = XOR256(u, clmul128(m, st->hx[2 * PARALLEL_BLOCKS - 1 - j])); + } + + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16, + rev_counters); + + pi = i; + for (j = 0; j < PARALLEL_BLOCKS; j += 1) { + m = REV128(LOAD128(dst + pi + j * 16)); + u = XOR256(u, clmul128(m, st->hx[PARALLEL_BLOCKS - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + + pi = i - PARALLEL_BLOCKS * 16; + m = REV128(LOAD128(dst + pi)); + u = clmul128(XOR128(sth->acc, m), st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + m = REV128(LOAD128(dst + pi + j * 16)); + u = XOR256(u, clmul128(m, st->hx[PARALLEL_BLOCKS - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + + /* PARALLEL_BLOCKS aggregation */ + + if (src_len - i >= PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + + for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + pi = i - PARALLEL_BLOCKS * 16; + m = REV128(LOAD128(dst + pi)); + u = clmul128(XOR128(sth->acc, m), st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + m = REV128(LOAD128(dst + pi + j * 16)); + u = XOR256(u, clmul128(m, st->hx[PARALLEL_BLOCKS - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + + pi = i - PARALLEL_BLOCKS * 16; + m = REV128(LOAD128(dst + pi)); + u = clmul128(XOR128(sth->acc, m), st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + m = REV128(LOAD128(dst + pi + j * 16)); + u = XOR256(u, clmul128(m, st->hx[PARALLEL_BLOCKS - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + + /* 4-blocks aggregation */ + + for (; i + 4 * 16 <= src_len; i += 4 * 16) { + counter = incr_counters(rev_counters, counter, 4); + for (j = 0; j < 4; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + + m = REV128(LOAD128(dst + i)); + u = clmul128(XOR128(sth->acc, m), st->hx[4 - 1 - 0]); + for (j = 1; j < 4; j += 1) { + m = REV128(LOAD128(dst + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[4 - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + + /* 2-blocks aggregation */ + + for (; i + 2 * 16 <= src_len; i += 2 * 16) { + counter = incr_counters(rev_counters, counter, 2); + for (j = 0; j < 2; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + + m = REV128(LOAD128(dst + i)); + u = clmul128(XOR128(sth->acc, m), st->hx[2 - 1 - 0]); + for (j = 1; j < 2; j += 1) { + m = REV128(LOAD128(dst + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[2 - 1 - j])); + } + sth->acc = gcm_reduce(u); + } + + /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the + full block authenticated along with the final block, hence < and not <= */ + + for (; i + 16 < src_len; i += 16) { + encrypt_xor_block(st, dst + i, src + i, REV128(counter)); + m = REV128(LOAD128(dst + i)); + u = clmul128(XOR128(sth->acc, m), st->hx[1 - 1 - 0]); + sth->acc = gcm_reduce(u); + counter = ADD64x2(counter, one); + } + + /* Authenticate both the last block of the message and the final block */ + + final_block = REV128(SET64x2(ad_len * 8, src_len * 8)); + STORE32_BE(counter_ + NPUBBYTES, 1); + encrypt(st, mac, counter_); + left = src_len - i; + if (left != 0) { + for (j = 0; j < left; j++) { + last_blocks[j] = src[i + j]; + } + STORE128(last_blocks + 16, final_block); + encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter)); + for (; j < 16; j++) { + last_blocks[j] = 0; + } + for (j = 0; j < left; j++) { + dst[i + j] = last_blocks[j]; + } + gh_ad_blocks(st, sth, last_blocks, 32); + } else { + STORE128(last_blocks, final_block); + gh_ad_blocks(st, sth, last_blocks, 16); + } + STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc))); +} + +/* Generic AES-GCM decryption. "Generic" as it can handle arbitrary input sizes, +unlike a length-limited version that would precompute all the required powers of H */ + +static void +aes_gcm_decrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst, + const unsigned char *src, size_t src_len, const unsigned char *ad, + size_t ad_len, unsigned char counter_[16]) +{ + CRYPTO_ALIGN(32) I256 u; + CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16]; + const BlockVec one = ONE128; + BlockVec final_block; + BlockVec rev_counters[PARALLEL_BLOCKS]; + BlockVec m; + BlockVec counter; + size_t i; + size_t j; + size_t left; + + COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0); + + /* Associated data */ + + if (ad != NULL && ad_len != 0) { + gh_ad_blocks(st, sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, sth, pad, sizeof pad); + } + } + + /* Encrypted data */ + + counter = REV128(LOAD128(counter_)); + i = 0; + + /* 2*PARALLEL_BLOCKS aggregation */ + + for (; i + 2 * PARALLEL_BLOCKS * 16 <= src_len; i += 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + m = REV128(LOAD128(src + i)); + u = clmul128(XOR128(sth->acc, m), st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + m = REV128(LOAD128(src + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[2 * PARALLEL_BLOCKS - 1 - j])); + } + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + for (j = 0; j < PARALLEL_BLOCKS; j += 1) { + m = REV128(LOAD128(src + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[PARALLEL_BLOCKS - 1 - j])); + } + sth->acc = gcm_reduce(u); + + encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16, + rev_counters); + } + + /* PARALLEL_BLOCKS aggregation */ + + for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + m = REV128(LOAD128(src + i)); + u = clmul128(XOR128(sth->acc, m), st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + m = REV128(LOAD128(src + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[PARALLEL_BLOCKS - 1 - j])); + } + sth->acc = gcm_reduce(u); + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + } + + /* 4-blocks aggregation */ + + for (; i + 4 * 16 <= src_len; i += 4 * 16) { + counter = incr_counters(rev_counters, counter, 4); + + m = REV128(LOAD128(src + i)); + u = clmul128(XOR128(sth->acc, m), st->hx[4 - 1 - 0]); + for (j = 1; j < 4; j += 1) { + m = REV128(LOAD128(src + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[4 - 1 - j])); + } + sth->acc = gcm_reduce(u); + + for (j = 0; j < 4; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + } + + /* 2-blocks aggregation */ + + for (; i + 2 * 16 <= src_len; i += 2 * 16) { + counter = incr_counters(rev_counters, counter, 2); + + m = REV128(LOAD128(src + i)); + u = clmul128(XOR128(sth->acc, m), st->hx[2 - 1 - 0]); + for (j = 1; j < 2; j += 1) { + m = REV128(LOAD128(src + i + j * 16)); + u = XOR256(u, clmul128(m, st->hx[2 - 1 - j])); + } + sth->acc = gcm_reduce(u); + + for (j = 0; j < 2; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + } + + /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the + full block authenticated along with the final block, hence < and not <= */ + + for (; i + 16 < src_len; i += 16) { + m = REV128(LOAD128(src + i)); + u = clmul128(XOR128(sth->acc, m), st->hx[1 - 1 - 0]); + sth->acc = gcm_reduce(u); + encrypt_xor_block(st, dst + i, src + i, REV128(counter)); + counter = ADD64x2(counter, one); + } + + /* Authenticate both the last block of the message and the final block */ + + final_block = REV128(SET64x2(ad_len * 8, src_len * 8)); + STORE32_BE(counter_ + NPUBBYTES, 1); + encrypt(st, mac, counter_); + left = src_len - i; + if (left != 0) { + for (j = 0; j < left; j++) { + last_blocks[j] = src[i + j]; + } + for (; j < 16; j++) { + last_blocks[j] = 0; + } + STORE128(last_blocks + 16, final_block); + gh_ad_blocks(st, sth, last_blocks, 32); + encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter)); + for (j = 0; j < left; j++) { + dst[i + j] = last_blocks[j]; + } + } else { + STORE128(last_blocks, final_block); + gh_ad_blocks(st, sth, last_blocks, 16); + } + STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc))); +} + +int +crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) +{ + State *st = (State *) (void *) st_; + CRYPTO_ALIGN(16) unsigned char h[16]; + + expand256(k, st->rkeys); + memset(h, 0, sizeof h); + encrypt(st, h, h); + + precomp_for_block_count(st->hx, h, PC_COUNT); + + return 0; +} + +int +crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + CRYPTO_ALIGN(16) unsigned char j[16]; + size_t gh_required_blocks; + + (void) nsec; + if (maclen_p != NULL) { + *maclen_p = 0; + } + gh_required_blocks = required_blocks(ad_len, m_len); + if (gh_required_blocks == 0) { + memset(mac, 0xd0, ABYTES); + memset(c, 0, m_len); + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + aes_gcm_encrypt_generic(st, &sth, mac, c, m, m_len, ad, ad_len, j); + + if (maclen_p != NULL) { + *maclen_p = ABYTES; + } + return 0; +} + +int +crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + const int ret = crypto_aead_aes256gcm_encrypt_detached(c, c + m_len, NULL, m, m_len, ad, ad_len, + nsec, npub, k); + if (clen_p != NULL) { + if (ret == 0) { + *clen_p = m_len + crypto_aead_aes256gcm_ABYTES; + } else { + *clen_p = 0; + } + } + return ret; +} + +int +crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + int ret; + + PREFETCH_WRITE(c); + PREFETCH_READ(m); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, mac, maclen_p, m, m_len, ad, ad_len, + nsec, npub, &st); + sodium_memzero(&st, sizeof st); + + return ret; +} + +int +crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, c + mlen, NULL, m, mlen, ad, adlen, + nsec, npub, st_); + if (clen_p != NULL) { + *clen_p = mlen + crypto_aead_aes256gcm_ABYTES; + } + return ret; +} + +static int +crypto_aead_aes256gcm_verify_mac(unsigned char *nsec, const unsigned char *c, + unsigned long long c_len, const unsigned char *mac, + const unsigned char *ad, unsigned long long ad_len, + const unsigned char *npub, const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + BlockVec final_block; + CRYPTO_ALIGN(16) unsigned char j[16]; + CRYPTO_ALIGN(16) unsigned char computed_mac[16]; + CRYPTO_ALIGN(16) unsigned char last_block[16]; + size_t gh_required_blocks; + size_t left; + int ret; + + (void) nsec; + gh_required_blocks = required_blocks(ad_len, c_len); + if (gh_required_blocks == 0) { + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + gh_ad_blocks(st, &sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, &sth, pad, sizeof pad); + } + + gh_ad_blocks(st, &sth, c, c_len & ~15); + left = c_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, c + c_len - left, left); + gh_ad_blocks(st, &sth, pad, sizeof pad); + } + final_block = REV128(SET64x2(ad_len * 8, c_len * 8)); + STORE32_BE(j + NPUBBYTES, 1); + encrypt(st, computed_mac, j); + STORE128(last_block, final_block); + gh_ad_blocks(st, &sth, last_block, 16); + STORE128(computed_mac, XOR128(LOAD128(computed_mac), REV128(sth.acc))); + + ret = crypto_verify_16(mac, computed_mac); + sodium_memzero(computed_mac, sizeof computed_mac); + + return ret; +} + +int +crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long c_len, + const unsigned char *mac, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + CRYPTO_ALIGN(16) unsigned char j[16]; + unsigned char computed_mac[16]; + size_t gh_required_blocks; + const size_t m_len = c_len; + + if (m == NULL) { + return crypto_aead_aes256gcm_verify_mac(nsec, c, c_len, mac, ad, ad_len, npub, st_); + } + + (void) nsec; + gh_required_blocks = required_blocks(ad_len, m_len); + if (gh_required_blocks == 0) { + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + aes_gcm_decrypt_generic(st, &sth, computed_mac, m, c, m_len, ad, ad_len, j); + + if (crypto_verify_16(mac, computed_mac) != 0) { + sodium_memzero(computed_mac, sizeof computed_mac); + memset(m, 0xd0, m_len); + return -1; + } + return 0; +} + +int +crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, + unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= ABYTES) { + ret = crypto_aead_aes256gcm_decrypt_detached_afternm( + m, nsec, c, clen - ABYTES, c + clen - ABYTES, ad, adlen, npub, st_); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - ABYTES; + } + *mlen_p = mlen; + } + return ret; +} + +int +crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + + PREFETCH_WRITE(m); + PREFETCH_READ(c); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + + return crypto_aead_aes256gcm_decrypt_detached_afternm( + m, nsec, c, clen, mac, ad, adlen, npub, (const crypto_aead_aes256gcm_state *) &st); +} + +int +crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + int ret; + + PREFETCH_WRITE(m); + PREFETCH_READ(c); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + + ret = crypto_aead_aes256gcm_decrypt_afternm(m, mlen_p, nsec, c, clen, ad, adlen, npub, + (const crypto_aead_aes256gcm_state *) &st); + sodium_memzero(&st, sizeof st); + + return ret; +} + +int +crypto_aead_aes256gcm_is_available(void) +{ + return sodium_runtime_has_armcrypto(); +} + +#endif