From 48af322b7a0b9b0c4317cc096220544f51d5ac18 Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Sat, 14 Jan 2023 00:01:42 +0100 Subject: [PATCH] AES-256-GCM (AES-NI): prefetch the next blocks ...while computing the GHASH of the previous blocks. For AMD CPUs with disabled hardware prefetchers, the gain may be significant. --- .../crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c b/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c index c0b747e9..75df53d0 100644 --- a/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +++ b/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c @@ -444,6 +444,11 @@ aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); encrypt_xor_wide(st, dst + i, src + i, rev_counters); + PREFETCH_READ(src + i + PARALLEL_BLOCKS * 16); +#if PARALLEL_BLOCKS >= 64 / 16 + PREFETCH_READ(src + i + PARALLEL_BLOCKS * 16 + 64); +#endif + pi = i - PARALLEL_BLOCKS * 16; u = gh_update0(sth, dst + pi, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); for (j = 1; j < PARALLEL_BLOCKS; j += 1) { @@ -454,6 +459,10 @@ aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16, rev_counters); + PREFETCH_READ(src + i + 2 * PARALLEL_BLOCKS * 16); +#if PARALLEL_BLOCKS >= 64 / 16 + PREFETCH_READ(src + i + 2 * PARALLEL_BLOCKS * 16 + 64); +#endif pi = i; for (j = 0; j < PARALLEL_BLOCKS; j += 1) { gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);