1
mirror of https://github.com/jedisct1/libsodium.git synced 2024-12-28 22:21:15 -07:00

Make the poly1305_sse2 code more consistent with the other implementation

This commit is contained in:
Frank Denis 2015-11-14 13:56:57 +01:00
parent 6b7811471b
commit a964055487
2 changed files with 131 additions and 46 deletions

View File

@ -1,8 +1,35 @@
#include <stdint.h> #include <stdint.h>
#include <x86intrin.h> #include <string.h>
#include "utils.h"
#include "poly1305_sse2.h"
#include "../onetimeauth_poly1305.h"
#if defined(HAVE_TI_MODE) && defined(HAVE_AMD64_ASM) && defined(HAVE_EMMINTRIN_H)
#pragma GCC target("sse2")
#include <emmintrin.h>
#undef force_inline
#define force_inline __attribute__((always_inline))
typedef __m128i xmmi; typedef __m128i xmmi;
typedef unsigned int uint128_t __attribute__((mode(TI)));
#if defined(__SIZEOF_INT128__)
typedef unsigned __int128 uint128_t;
#else
typedef unsigned uint128_t __attribute__((mode(TI)));
#endif
#if defined(_MSC_VER)
# define POLY1305_NOINLINE __declspec(noinline)
#elif defined(__GNUC__)
# define POLY1305_NOINLINE __attribute__((noinline))
#else
# define POLY1305_NOINLINE
#endif
enum poly1305_state_flags_t { enum poly1305_state_flags_t {
poly1305_started = 1, poly1305_started = 1,
@ -22,43 +49,33 @@ typedef struct poly1305_state_internal_t {
uint32_t R4[5]; /* 20 bytes */ uint32_t R4[5]; /* 20 bytes */
uint64_t pad[2]; /* 16 bytes */ uint64_t pad[2]; /* 16 bytes */
uint64_t flags; /* 8 bytes */ uint64_t flags; /* 8 bytes */
} poly1305_state_internal; /* 124 bytes total */ } poly1305_state_internal_t; /* 124 bytes total */
typedef uint8_t poly1305_state[128];
#if defined(__AVX__)
#define FN(name) name##_avx
#else
#define FN(name) name##_sse2
#endif
size_t
FN(poly1305_block_size)(void) {
return 32;
}
/* copy 0-31 bytes */ /* copy 0-31 bytes */
inline __attribute__((always_inline)) static void static void force_inline
poly1305_block_copy31(uint8_t *dst, const uint8_t *src, size_t bytes) { poly1305_block_copy31(unsigned char *dst, const unsigned char *src, unsigned long long bytes)
size_t offset = src - dst; {
unsigned long long offset = src - dst;
if (bytes & 16) { _mm_store_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset))); dst += 16; } if (bytes & 16) { _mm_store_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset))); dst += 16; }
if (bytes & 8) { *(uint64_t *)dst = *(uint64_t *)(dst + offset); dst += 8; } if (bytes & 8) { *(uint64_t *)dst = *(uint64_t *)(dst + offset); dst += 8; }
if (bytes & 4) { *(uint32_t *)dst = *(uint32_t *)(dst + offset); dst += 4; } if (bytes & 4) { *(uint32_t *)dst = *(uint32_t *)(dst + offset); dst += 4; }
if (bytes & 2) { *(uint16_t *)dst = *(uint16_t *)(dst + offset); dst += 2; } if (bytes & 2) { *(uint16_t *)dst = *(uint16_t *)(dst + offset); dst += 2; }
if (bytes & 1) { *( uint8_t *)dst = *( uint8_t *)(dst + offset); } if (bytes & 1) { *( unsigned char *)dst = *( unsigned char *)(dst + offset); }
} }
__attribute__((noinline)) void static POLY1305_NOINLINE void
FN(poly1305_init_ext)(poly1305_state_internal *st, const unsigned char key[32], size_t bytes) { poly1305_init_ext(poly1305_state_internal_t *st,
const unsigned char key[32], unsigned long long bytes)
{
uint32_t *R; uint32_t *R;
uint128_t d[3],m0; uint128_t d[3],m0;
uint64_t r0,r1,r2; uint64_t r0,r1,r2;
uint32_t rp0,rp1,rp2,rp3,rp4; uint32_t rp0,rp1,rp2,rp3,rp4;
uint64_t rt0,rt1,rt2,st2,c; uint64_t rt0,rt1,rt2,st2,c;
uint64_t t0,t1; uint64_t t0,t1;
size_t i; unsigned long long i;
if (!bytes) bytes = ~(size_t)0; if (!bytes) bytes = ~(unsigned long long)0;
/* H = 0 */ /* H = 0 */
_mm_storeu_si128((xmmi *)&st->hh[0], _mm_setzero_si128()); _mm_storeu_si128((xmmi *)&st->hh[0], _mm_setzero_si128());
@ -121,9 +138,11 @@ FN(poly1305_init_ext)(poly1305_state_internal *st, const unsigned char key[32],
st->flags = 0; st->flags = 0;
} }
__attribute__((noinline)) void static POLY1305_NOINLINE void
FN(poly1305_blocks)(poly1305_state_internal *st, const uint8_t *m, size_t bytes) { poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
__attribute__((aligned(64))) xmmi HIBIT = _mm_shuffle_epi32(_mm_cvtsi32_si128(1 << 24), _MM_SHUFFLE(1,0,1,0)); unsigned long long bytes)
{
CRYPTO_ALIGN(64) xmmi HIBIT = _mm_shuffle_epi32(_mm_cvtsi32_si128(1 << 24), _MM_SHUFFLE(1,0,1,0));
const xmmi MMASK = _mm_shuffle_epi32(_mm_cvtsi32_si128((1 << 26) - 1), _MM_SHUFFLE(1,0,1,0)); const xmmi MMASK = _mm_shuffle_epi32(_mm_cvtsi32_si128((1 << 26) - 1), _MM_SHUFFLE(1,0,1,0));
const xmmi FIVE = _mm_shuffle_epi32(_mm_cvtsi32_si128(5), _MM_SHUFFLE(1,0,1,0)); const xmmi FIVE = _mm_shuffle_epi32(_mm_cvtsi32_si128(5), _MM_SHUFFLE(1,0,1,0));
@ -341,7 +360,6 @@ FN(poly1305_blocks)(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
} }
} }
if (bytes >= 32) { if (bytes >= 32) {
xmmi v01,v02,v03,v04; xmmi v01,v02,v03,v04;
xmmi v11,v12,v13,v14; xmmi v11,v12,v13,v14;
@ -483,17 +501,19 @@ FN(poly1305_blocks)(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
} }
} }
__attribute__((noinline)) void static POLY1305_NOINLINE void
FN(poly1305_finish_ext)(poly1305_state_internal *st, const uint8_t *m, size_t leftover, unsigned char mac[16]) { poly1305_finish_ext(poly1305_state_internal_t *st, const unsigned char *m,
unsigned long long leftover, unsigned char mac[16])
{
uint64_t h0,h1,h2; uint64_t h0,h1,h2;
uint64_t t0,t1,c; uint64_t t0,t1,c;
if (leftover) { if (leftover) {
__attribute__((aligned(16))) unsigned char final[32] = {0}; CRYPTO_ALIGN(16) unsigned char final[32] = {0};
poly1305_block_copy31(final, m, leftover); poly1305_block_copy31(final, m, leftover);
if (leftover != 16) final[leftover] = 1; if (leftover != 16) final[leftover] = 1;
st->flags |= (leftover >= 16) ? poly1305_final_shift8 : poly1305_final_shift16; st->flags |= (leftover >= 16) ? poly1305_final_shift8 : poly1305_final_shift16;
FN(poly1305_blocks)(st, final, 32); poly1305_blocks(st, final, 32);
} }
if (st->flags & poly1305_started) { if (st->flags & poly1305_started) {
@ -502,7 +522,7 @@ FN(poly1305_finish_ext)(poly1305_state_internal *st, const uint8_t *m, size_t le
st->flags |= poly1305_final_r2_r; st->flags |= poly1305_final_r2_r;
else else
st->flags |= poly1305_final_r_1; st->flags |= poly1305_final_r_1;
FN(poly1305_blocks)(st, NULL, 32); poly1305_blocks(st, NULL, 32);
} }
h0 = st->h[0]; h0 = st->h[0];
@ -534,19 +554,53 @@ FN(poly1305_finish_ext)(poly1305_state_internal *st, const uint8_t *m, size_t le
*(uint64_t *)(mac + 8) = h1; *(uint64_t *)(mac + 8) = h1;
} }
static int
crypto_onetimeauth_poly1305_sse2_init(crypto_onetimeauth_poly1305_state *state,
const unsigned char *key)
{
poly1305_init_ext((poly1305_state_internal_t *)(void *) state, key, 0U);
void return 0;
FN(poly1305_auth)(unsigned char out[16], const unsigned char *m, size_t inlen, const unsigned char key[32]) {
__attribute__((aligned(64))) poly1305_state S;
poly1305_state_internal *st = (poly1305_state_internal *)S;
size_t blocks;
FN(poly1305_init_ext)(st, key, inlen);
blocks = inlen & ~31;
if (blocks) {
FN(poly1305_blocks)(st, m, blocks);
m += blocks;
inlen -= blocks;
}
FN(poly1305_finish_ext)(st, m, inlen, out);
} }
static int
crypto_onetimeauth_poly1305_sse2_update(crypto_onetimeauth_poly1305_state *state,
const unsigned char *in,
unsigned long long inlen)
{
#warning Handle partial blocks
poly1305_blocks((poly1305_state_internal_t *)(void *) state, in, inlen);
return 0;
}
static int
crypto_onetimeauth_poly1305_sse2_final(crypto_onetimeauth_poly1305_state *state,
unsigned char *out)
{
poly1305_finish_ext((poly1305_state_internal_t *)(void *) state,
NULL, 0ULL, out);
return 0;
}
static int
crypto_onetimeauth_poly1305_sse2(unsigned char *out, const unsigned char *m,
unsigned long long inlen,
const unsigned char *key)
{
CRYPTO_ALIGN(64) poly1305_state_internal_t st;
unsigned long long blocks;
poly1305_init_ext(&st, key, inlen);
blocks = inlen & ~31;
if (blocks) {
poly1305_blocks(&st, m, blocks);
m += blocks;
inlen -= blocks;
}
poly1305_finish_ext(&st, m, inlen, out);
return 0;
}
#endif

View File

@ -0,0 +1,31 @@
#ifndef poly1305_sse2_H
#define poly1305_sse2_H
#include <stddef.h>
#include "crypto_onetimeauth_poly1305.h"
extern struct crypto_onetimeauth_poly1305_implementation
crypto_onetimeauth_poly1305_sse2_implementation;
static int crypto_onetimeauth_poly1305_sse2(unsigned char *out,
const unsigned char *in,
unsigned long long inlen,
const unsigned char *k);
static int crypto_onetimeauth_poly1305_sse2_verify(const unsigned char *h,
const unsigned char *in,
unsigned long long inlen,
const unsigned char *k);
static int crypto_onetimeauth_poly1305_sse2_init(crypto_onetimeauth_poly1305_state *state,
const unsigned char *key);
static int crypto_onetimeauth_poly1305_sse2_update(crypto_onetimeauth_poly1305_state *state,
const unsigned char *in,
unsigned long long inlen);
static int crypto_onetimeauth_poly1305_sse2_final(crypto_onetimeauth_poly1305_state *state,
unsigned char *out);
#endif /* poly1305_sse2_H */