1
mirror of https://github.com/jedisct1/libsodium.git synced 2024-12-19 18:15:18 -07:00

Use inline asm if supported

This commit is contained in:
Frank Denis 2020-05-04 17:58:34 +02:00
parent 88c568a035
commit 4967aa8f23
2 changed files with 132 additions and 79 deletions

View File

@ -148,29 +148,35 @@ fe25519_neg(fe25519 h, const fe25519 f)
static void static void
fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b) fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b)
{ {
const uint32_t mask = (uint32_t) (-(int32_t) b); uint32_t mask = (uint32_t) (-(int32_t) b);
int32_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9;
int32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9;
int32_t f0 = f[0]; f0 = f[0];
int32_t f1 = f[1]; f1 = f[1];
int32_t f2 = f[2]; f2 = f[2];
int32_t f3 = f[3]; f3 = f[3];
int32_t f4 = f[4]; f4 = f[4];
int32_t f5 = f[5]; f5 = f[5];
int32_t f6 = f[6]; f6 = f[6];
int32_t f7 = f[7]; f7 = f[7];
int32_t f8 = f[8]; f8 = f[8];
int32_t f9 = f[9]; f9 = f[9];
int32_t x0 = f0 ^ g[0]; x0 = f0 ^ g[0];
int32_t x1 = f1 ^ g[1]; x1 = f1 ^ g[1];
int32_t x2 = f2 ^ g[2]; x2 = f2 ^ g[2];
int32_t x3 = f3 ^ g[3]; x3 = f3 ^ g[3];
int32_t x4 = f4 ^ g[4]; x4 = f4 ^ g[4];
int32_t x5 = f5 ^ g[5]; x5 = f5 ^ g[5];
int32_t x6 = f6 ^ g[6]; x6 = f6 ^ g[6];
int32_t x7 = f7 ^ g[7]; x7 = f7 ^ g[7];
int32_t x8 = f8 ^ g[8]; x8 = f8 ^ g[8];
int32_t x9 = f9 ^ g[9]; x9 = f9 ^ g[9];
#ifdef HAVE_INLINE_ASM
__asm__ __volatile__("" : "+r"(mask));
#endif
x0 &= mask; x0 &= mask;
x1 &= mask; x1 &= mask;
@ -198,40 +204,47 @@ fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b)
static void static void
fe25519_cswap(fe25519 f, fe25519 g, unsigned int b) fe25519_cswap(fe25519 f, fe25519 g, unsigned int b)
{ {
const uint32_t mask = (uint32_t) (-(int64_t) b); uint32_t mask = (uint32_t) (-(int64_t) b);
int32_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9;
int32_t g0, g1, g2, g3, g4, g5, g6, g7, g8, g9;
int32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9;
int32_t f0 = f[0]; f0 = f[0];
int32_t f1 = f[1]; f1 = f[1];
int32_t f2 = f[2]; f2 = f[2];
int32_t f3 = f[3]; f3 = f[3];
int32_t f4 = f[4]; f4 = f[4];
int32_t f5 = f[5]; f5 = f[5];
int32_t f6 = f[6]; f6 = f[6];
int32_t f7 = f[7]; f7 = f[7];
int32_t f8 = f[8]; f8 = f[8];
int32_t f9 = f[9]; f9 = f[9];
int32_t g0 = g[0]; g0 = g[0];
int32_t g1 = g[1]; g1 = g[1];
int32_t g2 = g[2]; g2 = g[2];
int32_t g3 = g[3]; g3 = g[3];
int32_t g4 = g[4]; g4 = g[4];
int32_t g5 = g[5]; g5 = g[5];
int32_t g6 = g[6]; g6 = g[6];
int32_t g7 = g[7]; g7 = g[7];
int32_t g8 = g[8]; g8 = g[8];
int32_t g9 = g[9]; g9 = g[9];
int32_t x0 = f0 ^ g0; x0 = f0 ^ g0;
int32_t x1 = f1 ^ g1; x1 = f1 ^ g1;
int32_t x2 = f2 ^ g2; x2 = f2 ^ g2;
int32_t x3 = f3 ^ g3; x3 = f3 ^ g3;
int32_t x4 = f4 ^ g4; x4 = f4 ^ g4;
int32_t x5 = f5 ^ g5; x5 = f5 ^ g5;
int32_t x6 = f6 ^ g6; x6 = f6 ^ g6;
int32_t x7 = f7 ^ g7; x7 = f7 ^ g7;
int32_t x8 = f8 ^ g8; x8 = f8 ^ g8;
int32_t x9 = f9 ^ g9; x9 = f9 ^ g9;
#ifdef HAVE_INLINE_ASM
__asm__ __volatile__("" : "+r"(mask));
#endif
x0 &= mask; x0 &= mask;
x1 &= mask; x1 &= mask;

View File

@ -109,19 +109,51 @@ fe25519_neg(fe25519 h, const fe25519 f)
static void static void
fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b) fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b)
{ {
const uint64_t mask = (uint64_t) (-(int64_t) b); #ifdef HAVE_AMD64_ASM
uint64_t t0, t1, t2;
uint64_t f0 = f[0]; __asm__ __volatile__
uint64_t f1 = f[1]; (
uint64_t f2 = f[2]; "test %[c], %[c]\n"
uint64_t f3 = f[3]; "movq (%[b]), %[t0]\n"
uint64_t f4 = f[4]; "cmoveq (%[a]), %[t0]\n"
"movq 8(%[b]), %[t1]\n"
"cmoveq 8(%[a]), %[t1]\n"
"movq 16(%[b]), %[t2]\n"
"cmoveq 16(%[a]), %[t2]\n"
"movq %[t0], (%[a])\n"
"movq %[t1], 8(%[a])\n"
"movq 24(%[b]), %[t0]\n"
"cmoveq 24(%[a]), %[t0]\n"
"movq 32(%[b]), %[t1]\n"
"cmoveq 32(%[a]), %[t1]\n"
"movq %[t2], 16(%[a])\n"
"movq %[t0], 24(%[a])\n"
"movq %[t1], 32(%[a])\n"
: [ t0 ] "=&r"(t0), [ t1 ] "=&r"(t1), [ t2 ] "=&r"(t2)
: [ a ] "r"(*(unsigned char(*)[40]) f),
[ b ] "r"(*(const unsigned char(*)[40]) g), [ c ] "r"(b)
: "cc");
#else
uint64_t mask = (uint64_t) (-(int64_t) b);
uint64_t f0, f1, f2, f3, f4;
uint64_t x0, x1, x2, x3, x4;
uint64_t x0 = f0 ^ g[0]; f0 = f[0];
uint64_t x1 = f1 ^ g[1]; f1 = f[1];
uint64_t x2 = f2 ^ g[2]; f2 = f[2];
uint64_t x3 = f3 ^ g[3]; f3 = f[3];
uint64_t x4 = f4 ^ g[4]; f4 = f[4];
x0 = f0 ^ g[0];
x1 = f1 ^ g[1];
x2 = f2 ^ g[2];
x3 = f3 ^ g[3];
x4 = f4 ^ g[4];
# ifdef HAVE_INLINE_ASM
__asm__ __volatile__("" : "+r"(mask));
# endif
x0 &= mask; x0 &= mask;
x1 &= mask; x1 &= mask;
@ -134,6 +166,7 @@ fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b)
f[2] = f2 ^ x2; f[2] = f2 ^ x2;
f[3] = f3 ^ x3; f[3] = f3 ^ x3;
f[4] = f4 ^ x4; f[4] = f4 ^ x4;
#endif
} }
/* /*
@ -146,25 +179,32 @@ Preconditions: b in {0,1}.
static void static void
fe25519_cswap(fe25519 f, fe25519 g, unsigned int b) fe25519_cswap(fe25519 f, fe25519 g, unsigned int b)
{ {
const uint64_t mask = (uint64_t) (-(int64_t) b); uint64_t mask = (uint64_t) (-(int64_t) b);
uint64_t f0, f1, f2, f3, f4;
uint64_t g0, g1, g2, g3, g4;
uint64_t x0, x1, x2, x3, x4;
uint64_t f0 = f[0]; f0 = f[0];
uint64_t f1 = f[1]; f1 = f[1];
uint64_t f2 = f[2]; f2 = f[2];
uint64_t f3 = f[3]; f3 = f[3];
uint64_t f4 = f[4]; f4 = f[4];
uint64_t g0 = g[0]; g0 = g[0];
uint64_t g1 = g[1]; g1 = g[1];
uint64_t g2 = g[2]; g2 = g[2];
uint64_t g3 = g[3]; g3 = g[3];
uint64_t g4 = g[4]; g4 = g[4];
uint64_t x0 = f0 ^ g0; x0 = f0 ^ g0;
uint64_t x1 = f1 ^ g1; x1 = f1 ^ g1;
uint64_t x2 = f2 ^ g2; x2 = f2 ^ g2;
uint64_t x3 = f3 ^ g3; x3 = f3 ^ g3;
uint64_t x4 = f4 ^ g4; x4 = f4 ^ g4;
# ifdef HAVE_INLINE_ASM
__asm__ __volatile__("" : "+r"(mask));
# endif
x0 &= mask; x0 &= mask;
x1 &= mask; x1 &= mask;