mirror of
https://github.com/jedisct1/libsodium.git
synced 2024-12-20 10:37:24 -07:00
Import the raw Sandy2x curve25519 implementation
This commit is contained in:
parent
9623e58c03
commit
7e00ec1a9b
44
src/libsodium/crypto_scalarmult/curve25519/sandy2x/base.c
Normal file
44
src/libsodium/crypto_scalarmult/curve25519/sandy2x/base.c
Normal file
@ -0,0 +1,44 @@
|
||||
#include "crypto_scalarmult.h"
|
||||
|
||||
#include "fe.h"
|
||||
#include "fe51.h"
|
||||
#include "ladder_base.h"
|
||||
|
||||
#define x2 var[0]
|
||||
#define z2 var[1]
|
||||
|
||||
int crypto_scalarmult_base(unsigned char *q,const unsigned char *n)
|
||||
{
|
||||
unsigned char e[32];
|
||||
unsigned int i;
|
||||
|
||||
fe var[3];
|
||||
|
||||
fe51 x_51;
|
||||
fe51 z_51;
|
||||
|
||||
for (i = 0;i < 32;++i) e[i] = n[i];
|
||||
e[0] &= 248;
|
||||
e[31] &= 127;
|
||||
e[31] |= 64;
|
||||
|
||||
ladder_base(var, e);
|
||||
|
||||
z_51.v[0] = (z2[1] << 26) + z2[0];
|
||||
z_51.v[1] = (z2[3] << 26) + z2[2];
|
||||
z_51.v[2] = (z2[5] << 26) + z2[4];
|
||||
z_51.v[3] = (z2[7] << 26) + z2[6];
|
||||
z_51.v[4] = (z2[9] << 26) + z2[8];
|
||||
|
||||
x_51.v[0] = (x2[1] << 26) + x2[0];
|
||||
x_51.v[1] = (x2[3] << 26) + x2[2];
|
||||
x_51.v[2] = (x2[5] << 26) + x2[4];
|
||||
x_51.v[3] = (x2[7] << 26) + x2[6];
|
||||
x_51.v[4] = (x2[9] << 26) + x2[8];
|
||||
|
||||
fe51_invert(&z_51, &z_51);
|
||||
fe51_mul(&x_51, &x_51, &z_51);
|
||||
fe51_pack(q, &x_51);
|
||||
|
||||
return 0;
|
||||
}
|
38
src/libsodium/crypto_scalarmult/curve25519/sandy2x/consts.S
Normal file
38
src/libsodium/crypto_scalarmult/curve25519/sandy2x/consts.S
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
REDMASK51 is from amd64-51/consts.s.
|
||||
*/
|
||||
#include "consts_namespace.h"
|
||||
.data
|
||||
.globl v0_0
|
||||
.globl v1_0
|
||||
.globl v2_1
|
||||
.globl v2_2
|
||||
.globl v9_0
|
||||
.globl v9_9
|
||||
.globl v19_19
|
||||
.globl v38_1
|
||||
.globl v38_19
|
||||
.globl v38_38
|
||||
.globl v121666_121666
|
||||
.globl m25
|
||||
.globl m26
|
||||
.globl subc0
|
||||
.globl subc2
|
||||
.globl REDMASK51
|
||||
.p2align 4
|
||||
v0_0: .quad 0, 0
|
||||
v1_0: .quad 1, 0
|
||||
v2_1: .quad 2, 1
|
||||
v2_2: .quad 2, 2
|
||||
v9_0: .quad 9, 0
|
||||
v9_9: .quad 9, 9
|
||||
v19_19: .quad 19, 19
|
||||
v38_1: .quad 38, 1
|
||||
v38_19: .quad 38, 19
|
||||
v38_38: .quad 38, 38
|
||||
v121666_121666: .quad 121666, 121666
|
||||
m25: .quad 33554431, 33554431
|
||||
m26: .quad 67108863, 67108863
|
||||
subc0: .quad 0x07FFFFDA, 0x03FFFFFE
|
||||
subc2: .quad 0x07FFFFFE, 0x03FFFFFE
|
||||
REDMASK51: .quad 0x0007FFFFFFFFFFFF
|
@ -0,0 +1,22 @@
|
||||
#ifndef CONSTS_NAMESPACE_H
|
||||
#define CONSTS_NAMESPACE_H
|
||||
|
||||
#define v0_0 crypto_scalarmult_curve25519_sandy2x_v0_0
|
||||
#define v1_0 crypto_scalarmult_curve25519_sandy2x_v1_0
|
||||
#define v2_1 crypto_scalarmult_curve25519_sandy2x_v2_1
|
||||
#define v2_2 crypto_scalarmult_curve25519_sandy2x_v2_2
|
||||
#define v9_0 crypto_scalarmult_curve25519_sandy2x_v9_0
|
||||
#define v9_9 crypto_scalarmult_curve25519_sandy2x_v9_9
|
||||
#define v19_19 crypto_scalarmult_curve25519_sandy2x_v19_19
|
||||
#define v38_1 crypto_scalarmult_curve25519_sandy2x_v38_1
|
||||
#define v38_19 crypto_scalarmult_curve25519_sandy2x_v38_19
|
||||
#define v38_38 crypto_scalarmult_curve25519_sandy2x_v38_38
|
||||
#define v121666_121666 crypto_scalarmult_curve25519_sandy2x_v121666_121666
|
||||
#define m25 crypto_scalarmult_curve25519_sandy2x_m25
|
||||
#define m26 crypto_scalarmult_curve25519_sandy2x_m26
|
||||
#define subc0 crypto_scalarmult_curve25519_sandy2x_subc0
|
||||
#define subc2 crypto_scalarmult_curve25519_sandy2x_subc2
|
||||
#define REDMASK51 crypto_scalarmult_curve25519_sandy2x_REDMASK51
|
||||
|
||||
#endif //ifndef CONSTS_NAMESPACE_H
|
||||
|
25
src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe.h
Normal file
25
src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe.h
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
This file is adapted from ref10/fe.h:
|
||||
All the redundant functions are removed.
|
||||
*/
|
||||
|
||||
#ifndef FE_H
|
||||
#define FE_H
|
||||
|
||||
#include "crypto_uint64.h"
|
||||
|
||||
typedef crypto_uint64 fe[10];
|
||||
|
||||
/*
|
||||
fe means field element.
|
||||
Here the field is \Z/(2^255-19).
|
||||
An element t, entries t[0]...t[9], represents the integer
|
||||
t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
|
||||
Bounds on each t[i] vary depending on context.
|
||||
*/
|
||||
|
||||
#define fe_frombytes crypto_scalarmult_curve25519_sandy2x_fe_frombytes
|
||||
|
||||
extern void fe_frombytes(fe, const unsigned char *);
|
||||
|
||||
#endif
|
25
src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51.h
Normal file
25
src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51.h
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
This file is adapted from amd64-51/fe25519.h:
|
||||
'fe25519' is renamed as 'fe51';
|
||||
All the redundant functions are removed;
|
||||
New function fe51_nsquare is introduced.
|
||||
*/
|
||||
|
||||
#ifndef FE51_H
|
||||
#define FE51_H
|
||||
|
||||
#include "crypto_uint64.h"
|
||||
#include "fe51_namespace.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
crypto_uint64 v[5];
|
||||
}
|
||||
fe51;
|
||||
|
||||
extern void fe51_pack(unsigned char *, const fe51 *);
|
||||
extern void fe51_mul(fe51 *, const fe51 *, const fe51 *);
|
||||
extern void fe51_nsquare(fe51 *, const fe51 *, int);
|
||||
extern void fe51_invert(fe51 *, const fe51 *);
|
||||
|
||||
#endif
|
@ -0,0 +1,53 @@
|
||||
/*
|
||||
This file is adapted from amd64-51/fe25519_invert.c:
|
||||
Loops of squares are replaced by nsquares for better performance.
|
||||
*/
|
||||
|
||||
#include "fe51.h"
|
||||
|
||||
#define fe51_square(x, y) fe51_nsquare(x, y, 1)
|
||||
|
||||
void fe51_invert(fe51 *r, const fe51 *x)
|
||||
{
|
||||
fe51 z2;
|
||||
fe51 z9;
|
||||
fe51 z11;
|
||||
fe51 z2_5_0;
|
||||
fe51 z2_10_0;
|
||||
fe51 z2_20_0;
|
||||
fe51 z2_50_0;
|
||||
fe51 z2_100_0;
|
||||
fe51 t;
|
||||
|
||||
/* 2 */ fe51_square(&z2,x);
|
||||
/* 4 */ fe51_square(&t,&z2);
|
||||
/* 8 */ fe51_square(&t,&t);
|
||||
/* 9 */ fe51_mul(&z9,&t,x);
|
||||
/* 11 */ fe51_mul(&z11,&z9,&z2);
|
||||
/* 22 */ fe51_square(&t,&z11);
|
||||
/* 2^5 - 2^0 = 31 */ fe51_mul(&z2_5_0,&t,&z9);
|
||||
|
||||
/* 2^10 - 2^5 */ fe51_nsquare(&t,&z2_5_0, 5);
|
||||
/* 2^10 - 2^0 */ fe51_mul(&z2_10_0,&t,&z2_5_0);
|
||||
|
||||
/* 2^20 - 2^10 */ fe51_nsquare(&t,&z2_10_0, 10);
|
||||
/* 2^20 - 2^0 */ fe51_mul(&z2_20_0,&t,&z2_10_0);
|
||||
|
||||
/* 2^40 - 2^20 */ fe51_nsquare(&t,&z2_20_0, 20);
|
||||
/* 2^40 - 2^0 */ fe51_mul(&t,&t,&z2_20_0);
|
||||
|
||||
/* 2^50 - 2^10 */ fe51_nsquare(&t,&t,10);
|
||||
/* 2^50 - 2^0 */ fe51_mul(&z2_50_0,&t,&z2_10_0);
|
||||
|
||||
/* 2^100 - 2^50 */ fe51_nsquare(&t,&z2_50_0, 50);
|
||||
/* 2^100 - 2^0 */ fe51_mul(&z2_100_0,&t,&z2_50_0);
|
||||
|
||||
/* 2^200 - 2^100 */ fe51_nsquare(&t,&z2_100_0, 100);
|
||||
/* 2^200 - 2^0 */ fe51_mul(&t,&t,&z2_100_0);
|
||||
|
||||
/* 2^250 - 2^50 */ fe51_nsquare(&t,&t, 50);
|
||||
/* 2^250 - 2^0 */ fe51_mul(&t,&t,&z2_50_0);
|
||||
|
||||
/* 2^255 - 2^5 */ fe51_nsquare(&t,&t,5);
|
||||
/* 2^255 - 21 */ fe51_mul(r,&t,&z11);
|
||||
}
|
185
src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_mul.S
Normal file
185
src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_mul.S
Normal file
@ -0,0 +1,185 @@
|
||||
/*
|
||||
This file is basically amd64-51/fe25519_mul.s.
|
||||
*/
|
||||
#include "fe51_namespace.h"
|
||||
#include "consts_namespace.h"
|
||||
.text
|
||||
.p2align 5
|
||||
.globl _fe51_mul
|
||||
.globl fe51_mul
|
||||
_fe51_mul:
|
||||
fe51_mul:
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $96,%r11
|
||||
sub %r11,%rsp
|
||||
movq %r11,0(%rsp)
|
||||
movq %r12,8(%rsp)
|
||||
movq %r13,16(%rsp)
|
||||
movq %r14,24(%rsp)
|
||||
movq %r15,32(%rsp)
|
||||
movq %rbx,40(%rsp)
|
||||
movq %rbp,48(%rsp)
|
||||
movq %rdi,56(%rsp)
|
||||
mov %rdx,%rcx
|
||||
movq 24(%rsi),%rdx
|
||||
imulq $19,%rdx,%rax
|
||||
movq %rax,64(%rsp)
|
||||
mulq 16(%rcx)
|
||||
mov %rax,%r8
|
||||
mov %rdx,%r9
|
||||
movq 32(%rsi),%rdx
|
||||
imulq $19,%rdx,%rax
|
||||
movq %rax,72(%rsp)
|
||||
mulq 8(%rcx)
|
||||
add %rax,%r8
|
||||
adc %rdx,%r9
|
||||
movq 0(%rsi),%rax
|
||||
mulq 0(%rcx)
|
||||
add %rax,%r8
|
||||
adc %rdx,%r9
|
||||
movq 0(%rsi),%rax
|
||||
mulq 8(%rcx)
|
||||
mov %rax,%r10
|
||||
mov %rdx,%r11
|
||||
movq 0(%rsi),%rax
|
||||
mulq 16(%rcx)
|
||||
mov %rax,%r12
|
||||
mov %rdx,%r13
|
||||
movq 0(%rsi),%rax
|
||||
mulq 24(%rcx)
|
||||
mov %rax,%r14
|
||||
mov %rdx,%r15
|
||||
movq 0(%rsi),%rax
|
||||
mulq 32(%rcx)
|
||||
mov %rax,%rbx
|
||||
mov %rdx,%rbp
|
||||
movq 8(%rsi),%rax
|
||||
mulq 0(%rcx)
|
||||
add %rax,%r10
|
||||
adc %rdx,%r11
|
||||
movq 8(%rsi),%rax
|
||||
mulq 8(%rcx)
|
||||
add %rax,%r12
|
||||
adc %rdx,%r13
|
||||
movq 8(%rsi),%rax
|
||||
mulq 16(%rcx)
|
||||
add %rax,%r14
|
||||
adc %rdx,%r15
|
||||
movq 8(%rsi),%rax
|
||||
mulq 24(%rcx)
|
||||
add %rax,%rbx
|
||||
adc %rdx,%rbp
|
||||
movq 8(%rsi),%rdx
|
||||
imulq $19,%rdx,%rax
|
||||
mulq 32(%rcx)
|
||||
add %rax,%r8
|
||||
adc %rdx,%r9
|
||||
movq 16(%rsi),%rax
|
||||
mulq 0(%rcx)
|
||||
add %rax,%r12
|
||||
adc %rdx,%r13
|
||||
movq 16(%rsi),%rax
|
||||
mulq 8(%rcx)
|
||||
add %rax,%r14
|
||||
adc %rdx,%r15
|
||||
movq 16(%rsi),%rax
|
||||
mulq 16(%rcx)
|
||||
add %rax,%rbx
|
||||
adc %rdx,%rbp
|
||||
movq 16(%rsi),%rdx
|
||||
imulq $19,%rdx,%rax
|
||||
mulq 24(%rcx)
|
||||
add %rax,%r8
|
||||
adc %rdx,%r9
|
||||
movq 16(%rsi),%rdx
|
||||
imulq $19,%rdx,%rax
|
||||
mulq 32(%rcx)
|
||||
add %rax,%r10
|
||||
adc %rdx,%r11
|
||||
movq 24(%rsi),%rax
|
||||
mulq 0(%rcx)
|
||||
add %rax,%r14
|
||||
adc %rdx,%r15
|
||||
movq 24(%rsi),%rax
|
||||
mulq 8(%rcx)
|
||||
add %rax,%rbx
|
||||
adc %rdx,%rbp
|
||||
movq 64(%rsp),%rax
|
||||
mulq 24(%rcx)
|
||||
add %rax,%r10
|
||||
adc %rdx,%r11
|
||||
movq 64(%rsp),%rax
|
||||
mulq 32(%rcx)
|
||||
add %rax,%r12
|
||||
adc %rdx,%r13
|
||||
movq 32(%rsi),%rax
|
||||
mulq 0(%rcx)
|
||||
add %rax,%rbx
|
||||
adc %rdx,%rbp
|
||||
movq 72(%rsp),%rax
|
||||
mulq 16(%rcx)
|
||||
add %rax,%r10
|
||||
adc %rdx,%r11
|
||||
movq 72(%rsp),%rax
|
||||
mulq 24(%rcx)
|
||||
add %rax,%r12
|
||||
adc %rdx,%r13
|
||||
movq 72(%rsp),%rax
|
||||
mulq 32(%rcx)
|
||||
add %rax,%r14
|
||||
adc %rdx,%r15
|
||||
movq REDMASK51,%rsi
|
||||
shld $13,%r8,%r9
|
||||
and %rsi,%r8
|
||||
shld $13,%r10,%r11
|
||||
and %rsi,%r10
|
||||
add %r9,%r10
|
||||
shld $13,%r12,%r13
|
||||
and %rsi,%r12
|
||||
add %r11,%r12
|
||||
shld $13,%r14,%r15
|
||||
and %rsi,%r14
|
||||
add %r13,%r14
|
||||
shld $13,%rbx,%rbp
|
||||
and %rsi,%rbx
|
||||
add %r15,%rbx
|
||||
imulq $19,%rbp,%rdx
|
||||
add %rdx,%r8
|
||||
mov %r8,%rdx
|
||||
shr $51,%rdx
|
||||
add %r10,%rdx
|
||||
mov %rdx,%rcx
|
||||
shr $51,%rdx
|
||||
and %rsi,%r8
|
||||
add %r12,%rdx
|
||||
mov %rdx,%r9
|
||||
shr $51,%rdx
|
||||
and %rsi,%rcx
|
||||
add %r14,%rdx
|
||||
mov %rdx,%rax
|
||||
shr $51,%rdx
|
||||
and %rsi,%r9
|
||||
add %rbx,%rdx
|
||||
mov %rdx,%r10
|
||||
shr $51,%rdx
|
||||
and %rsi,%rax
|
||||
imulq $19,%rdx,%rdx
|
||||
add %rdx,%r8
|
||||
and %rsi,%r10
|
||||
movq %r8,0(%rdi)
|
||||
movq %rcx,8(%rdi)
|
||||
movq %r9,16(%rdi)
|
||||
movq %rax,24(%rdi)
|
||||
movq %r10,32(%rdi)
|
||||
movq 0(%rsp),%r11
|
||||
movq 8(%rsp),%r12
|
||||
movq 16(%rsp),%r13
|
||||
movq 24(%rsp),%r14
|
||||
movq 32(%rsp),%r15
|
||||
movq 40(%rsp),%rbx
|
||||
movq 48(%rsp),%rbp
|
||||
add %r11,%rsp
|
||||
mov %rdi,%rax
|
||||
mov %rsi,%rdx
|
||||
ret
|
@ -0,0 +1,16 @@
|
||||
#ifndef FE51_NAMESPACE_H
|
||||
#define FE51_NAMESPACE_H
|
||||
|
||||
#define fe51 crypto_scalarmult_curve25519_sandy2x_fe51
|
||||
#define _fe51 _crypto_scalarmult_curve25519_sandy2x_fe51
|
||||
#define fe51_pack crypto_scalarmult_curve25519_sandy2x_fe51_pack
|
||||
#define _fe51_pack _crypto_scalarmult_curve25519_sandy2x_fe51_pack
|
||||
#define fe51_mul crypto_scalarmult_curve25519_sandy2x_fe51_mul
|
||||
#define _fe51_mul _crypto_scalarmult_curve25519_sandy2x_fe51_mul
|
||||
#define fe51_nsquare crypto_scalarmult_curve25519_sandy2x_fe51_nsquare
|
||||
#define _fe51_nsquare _crypto_scalarmult_curve25519_sandy2x_fe51_nsquare
|
||||
|
||||
#define fe51_invert crypto_scalarmult_curve25519_sandy2x_fe51_invert
|
||||
|
||||
#endif //ifndef FE51_NAMESPACE_H
|
||||
|
@ -0,0 +1,155 @@
|
||||
/*
|
||||
This file is adapted from amd64-51/fe25519_square.s:
|
||||
Adding loop to perform n squares.
|
||||
*/
|
||||
#include "fe51_namespace.h"
|
||||
#include "consts_namespace.h"
|
||||
.p2align 5
|
||||
.global _fe51_nsquare
|
||||
.global fe51_nsquare
|
||||
_fe51_nsquare:
|
||||
fe51_nsquare:
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $64,%r11
|
||||
sub %r11,%rsp
|
||||
movq %r11,0(%rsp)
|
||||
movq %r12,8(%rsp)
|
||||
movq %r13,16(%rsp)
|
||||
movq %r14,24(%rsp)
|
||||
movq %r15,32(%rsp)
|
||||
movq %rbx,40(%rsp)
|
||||
movq %rbp,48(%rsp)
|
||||
movq 0(%rsi),%rcx
|
||||
movq 8(%rsi),%r8
|
||||
movq 16(%rsi),%r9
|
||||
movq 24(%rsi),%rax
|
||||
movq 32(%rsi),%rsi
|
||||
movq %r9,16(%rdi)
|
||||
movq %rax,24(%rdi)
|
||||
movq %rsi,32(%rdi)
|
||||
mov %rdx,%rsi
|
||||
._loop:
|
||||
sub $1,%rsi
|
||||
mov %rcx,%rax
|
||||
mul %rcx
|
||||
add %rcx,%rcx
|
||||
mov %rax,%r9
|
||||
mov %rdx,%r10
|
||||
mov %rcx,%rax
|
||||
mul %r8
|
||||
mov %rax,%r11
|
||||
mov %rdx,%r12
|
||||
mov %rcx,%rax
|
||||
mulq 16(%rdi)
|
||||
mov %rax,%r13
|
||||
mov %rdx,%r14
|
||||
mov %rcx,%rax
|
||||
mulq 24(%rdi)
|
||||
mov %rax,%r15
|
||||
mov %rdx,%rbx
|
||||
mov %rcx,%rax
|
||||
mulq 32(%rdi)
|
||||
mov %rax,%rcx
|
||||
mov %rdx,%rbp
|
||||
mov %r8,%rax
|
||||
mul %r8
|
||||
add %r8,%r8
|
||||
add %rax,%r13
|
||||
adc %rdx,%r14
|
||||
mov %r8,%rax
|
||||
mulq 16(%rdi)
|
||||
add %rax,%r15
|
||||
adc %rdx,%rbx
|
||||
mov %r8,%rax
|
||||
imulq $19, %r8,%r8
|
||||
mulq 24(%rdi)
|
||||
add %rax,%rcx
|
||||
adc %rdx,%rbp
|
||||
mov %r8,%rax
|
||||
mulq 32(%rdi)
|
||||
add %rax,%r9
|
||||
adc %rdx,%r10
|
||||
movq 16(%rdi),%rax
|
||||
mulq 16(%rdi)
|
||||
add %rax,%rcx
|
||||
adc %rdx,%rbp
|
||||
shld $13,%rcx,%rbp
|
||||
movq 16(%rdi),%rax
|
||||
imulq $38, %rax,%rax
|
||||
mulq 24(%rdi)
|
||||
add %rax,%r9
|
||||
adc %rdx,%r10
|
||||
shld $13,%r9,%r10
|
||||
movq 16(%rdi),%rax
|
||||
imulq $38, %rax,%rax
|
||||
mulq 32(%rdi)
|
||||
add %rax,%r11
|
||||
adc %rdx,%r12
|
||||
movq 24(%rdi),%rax
|
||||
imulq $19, %rax,%rax
|
||||
mulq 24(%rdi)
|
||||
add %rax,%r11
|
||||
adc %rdx,%r12
|
||||
shld $13,%r11,%r12
|
||||
movq 24(%rdi),%rax
|
||||
imulq $38, %rax,%rax
|
||||
mulq 32(%rdi)
|
||||
add %rax,%r13
|
||||
adc %rdx,%r14
|
||||
shld $13,%r13,%r14
|
||||
movq 32(%rdi),%rax
|
||||
imulq $19, %rax,%rax
|
||||
mulq 32(%rdi)
|
||||
add %rax,%r15
|
||||
adc %rdx,%rbx
|
||||
shld $13,%r15,%rbx
|
||||
movq REDMASK51,%rdx
|
||||
and %rdx,%rcx
|
||||
add %rbx,%rcx
|
||||
and %rdx,%r9
|
||||
and %rdx,%r11
|
||||
add %r10,%r11
|
||||
and %rdx,%r13
|
||||
add %r12,%r13
|
||||
and %rdx,%r15
|
||||
add %r14,%r15
|
||||
imulq $19, %rbp,%rbp
|
||||
lea (%r9,%rbp),%r9
|
||||
mov %r9,%rax
|
||||
shr $51,%r9
|
||||
add %r11,%r9
|
||||
and %rdx,%rax
|
||||
mov %r9,%r8
|
||||
shr $51,%r9
|
||||
add %r13,%r9
|
||||
and %rdx,%r8
|
||||
mov %r9,%r10
|
||||
shr $51,%r9
|
||||
add %r15,%r9
|
||||
and %rdx,%r10
|
||||
movq %r10,16(%rdi)
|
||||
mov %r9,%r10
|
||||
shr $51,%r9
|
||||
add %rcx,%r9
|
||||
and %rdx,%r10
|
||||
movq %r10,24(%rdi)
|
||||
mov %r9,%r10
|
||||
shr $51,%r9
|
||||
imulq $19, %r9,%r9
|
||||
lea (%rax,%r9),%rcx
|
||||
and %rdx,%r10
|
||||
movq %r10,32(%rdi)
|
||||
cmp $0,%rsi
|
||||
jne ._loop
|
||||
movq %rcx,0(%rdi)
|
||||
movq %r8,8(%rdi)
|
||||
movq 0(%rsp),%r11
|
||||
movq 8(%rsp),%r12
|
||||
movq 16(%rsp),%r13
|
||||
movq 24(%rsp),%r14
|
||||
movq 32(%rsp),%r15
|
||||
movq 40(%rsp),%rbx
|
||||
movq 48(%rsp),%rbp
|
||||
add %r11,%rsp
|
||||
ret
|
209
src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_pack.S
Normal file
209
src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_pack.S
Normal file
@ -0,0 +1,209 @@
|
||||
/*
|
||||
This file is the result of merging
|
||||
amd64-51/fe25519_pack.c and amd64-51/fe25519_freeze.s.
|
||||
*/
|
||||
#include "fe51_namespace.h"
|
||||
#include "consts_namespace.h"
|
||||
.p2align 5
|
||||
.global _fe51_pack
|
||||
.global fe51_pack
|
||||
_fe51_pack:
|
||||
fe51_pack:
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $32,%r11
|
||||
sub %r11,%rsp
|
||||
movq %r11,0(%rsp)
|
||||
movq %r12,8(%rsp)
|
||||
movq 0(%rsi),%rdx
|
||||
movq 8(%rsi),%rcx
|
||||
movq 16(%rsi),%r8
|
||||
movq 24(%rsi),%r9
|
||||
movq 32(%rsi),%rsi
|
||||
movq REDMASK51,%rax
|
||||
lea -18(%rax),%r10
|
||||
mov $3,%r11
|
||||
._reduceloop:
|
||||
mov %rdx,%r12
|
||||
shr $51,%r12
|
||||
and %rax,%rdx
|
||||
add %r12,%rcx
|
||||
mov %rcx,%r12
|
||||
shr $51,%r12
|
||||
and %rax,%rcx
|
||||
add %r12,%r8
|
||||
mov %r8,%r12
|
||||
shr $51,%r12
|
||||
and %rax,%r8
|
||||
add %r12,%r9
|
||||
mov %r9,%r12
|
||||
shr $51,%r12
|
||||
and %rax,%r9
|
||||
add %r12,%rsi
|
||||
mov %rsi,%r12
|
||||
shr $51,%r12
|
||||
and %rax,%rsi
|
||||
imulq $19, %r12,%r12
|
||||
add %r12,%rdx
|
||||
sub $1,%r11
|
||||
ja ._reduceloop
|
||||
mov $1,%r12
|
||||
cmp %r10,%rdx
|
||||
cmovl %r11,%r12
|
||||
cmp %rax,%rcx
|
||||
cmovne %r11,%r12
|
||||
cmp %rax,%r8
|
||||
cmovne %r11,%r12
|
||||
cmp %rax,%r9
|
||||
cmovne %r11,%r12
|
||||
cmp %rax,%rsi
|
||||
cmovne %r11,%r12
|
||||
neg %r12
|
||||
and %r12,%rax
|
||||
and %r12,%r10
|
||||
sub %r10,%rdx
|
||||
sub %rax,%rcx
|
||||
sub %rax,%r8
|
||||
sub %rax,%r9
|
||||
sub %rax,%rsi
|
||||
mov %rdx,%rax
|
||||
and $0xFF,%eax
|
||||
movb %al,0(%rdi)
|
||||
mov %rdx,%rax
|
||||
shr $8,%rax
|
||||
and $0xFF,%eax
|
||||
movb %al,1(%rdi)
|
||||
mov %rdx,%rax
|
||||
shr $16,%rax
|
||||
and $0xFF,%eax
|
||||
movb %al,2(%rdi)
|
||||
mov %rdx,%rax
|
||||
shr $24,%rax
|
||||
and $0xFF,%eax
|
||||
movb %al,3(%rdi)
|
||||
mov %rdx,%rax
|
||||
shr $32,%rax
|
||||
and $0xFF,%eax
|
||||
movb %al,4(%rdi)
|
||||
mov %rdx,%rax
|
||||
shr $40,%rax
|
||||
and $0xFF,%eax
|
||||
movb %al,5(%rdi)
|
||||
mov %rdx,%rdx
|
||||
shr $48,%rdx
|
||||
mov %rcx,%rax
|
||||
shl $3,%rax
|
||||
and $0xF8,%eax
|
||||
xor %rdx,%rax
|
||||
movb %al,6(%rdi)
|
||||
mov %rcx,%rdx
|
||||
shr $5,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,7(%rdi)
|
||||
mov %rcx,%rdx
|
||||
shr $13,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,8(%rdi)
|
||||
mov %rcx,%rdx
|
||||
shr $21,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,9(%rdi)
|
||||
mov %rcx,%rdx
|
||||
shr $29,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,10(%rdi)
|
||||
mov %rcx,%rdx
|
||||
shr $37,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,11(%rdi)
|
||||
mov %rcx,%rdx
|
||||
shr $45,%rdx
|
||||
mov %r8,%rcx
|
||||
shl $6,%rcx
|
||||
and $0xC0,%ecx
|
||||
xor %rdx,%rcx
|
||||
movb %cl,12(%rdi)
|
||||
mov %r8,%rdx
|
||||
shr $2,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,13(%rdi)
|
||||
mov %r8,%rdx
|
||||
shr $10,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,14(%rdi)
|
||||
mov %r8,%rdx
|
||||
shr $18,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,15(%rdi)
|
||||
mov %r8,%rdx
|
||||
shr $26,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,16(%rdi)
|
||||
mov %r8,%rdx
|
||||
shr $34,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,17(%rdi)
|
||||
mov %r8,%rdx
|
||||
shr $42,%rdx
|
||||
movb %dl,18(%rdi)
|
||||
mov %r8,%rdx
|
||||
shr $50,%rdx
|
||||
mov %r9,%rcx
|
||||
shl $1,%rcx
|
||||
and $0xFE,%ecx
|
||||
xor %rdx,%rcx
|
||||
movb %cl,19(%rdi)
|
||||
mov %r9,%rdx
|
||||
shr $7,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,20(%rdi)
|
||||
mov %r9,%rdx
|
||||
shr $15,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,21(%rdi)
|
||||
mov %r9,%rdx
|
||||
shr $23,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,22(%rdi)
|
||||
mov %r9,%rdx
|
||||
shr $31,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,23(%rdi)
|
||||
mov %r9,%rdx
|
||||
shr $39,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,24(%rdi)
|
||||
mov %r9,%rdx
|
||||
shr $47,%rdx
|
||||
mov %rsi,%rcx
|
||||
shl $4,%rcx
|
||||
and $0xF0,%ecx
|
||||
xor %rdx,%rcx
|
||||
movb %cl,25(%rdi)
|
||||
mov %rsi,%rdx
|
||||
shr $4,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,26(%rdi)
|
||||
mov %rsi,%rdx
|
||||
shr $12,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,27(%rdi)
|
||||
mov %rsi,%rdx
|
||||
shr $20,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,28(%rdi)
|
||||
mov %rsi,%rdx
|
||||
shr $28,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,29(%rdi)
|
||||
mov %rsi,%rdx
|
||||
shr $36,%rdx
|
||||
and $0xFF,%edx
|
||||
movb %dl,30(%rdi)
|
||||
mov %rsi,%rsi
|
||||
shr $44,%rsi
|
||||
movb %sil,31(%rdi)
|
||||
movq 0(%rsp),%r11
|
||||
movq 8(%rsp),%r12
|
||||
add %r11,%rsp
|
||||
ret
|
@ -0,0 +1,72 @@
|
||||
/*
|
||||
This file is basically ref10/fe_frombytes.h.
|
||||
*/
|
||||
|
||||
#include "fe.h"
|
||||
#include "crypto_uint64.h"
|
||||
|
||||
static crypto_uint64 load_3(const unsigned char *in)
|
||||
{
|
||||
crypto_uint64 result;
|
||||
result = (crypto_uint64) in[0];
|
||||
result |= ((crypto_uint64) in[1]) << 8;
|
||||
result |= ((crypto_uint64) in[2]) << 16;
|
||||
return result;
|
||||
}
|
||||
|
||||
static crypto_uint64 load_4(const unsigned char *in)
|
||||
{
|
||||
crypto_uint64 result;
|
||||
result = (crypto_uint64) in[0];
|
||||
result |= ((crypto_uint64) in[1]) << 8;
|
||||
result |= ((crypto_uint64) in[2]) << 16;
|
||||
result |= ((crypto_uint64) in[3]) << 24;
|
||||
return result;
|
||||
}
|
||||
|
||||
void fe_frombytes(fe h,const unsigned char *s)
|
||||
{
|
||||
crypto_uint64 h0 = load_4(s);
|
||||
crypto_uint64 h1 = load_3(s + 4) << 6;
|
||||
crypto_uint64 h2 = load_3(s + 7) << 5;
|
||||
crypto_uint64 h3 = load_3(s + 10) << 3;
|
||||
crypto_uint64 h4 = load_3(s + 13) << 2;
|
||||
crypto_uint64 h5 = load_4(s + 16);
|
||||
crypto_uint64 h6 = load_3(s + 20) << 7;
|
||||
crypto_uint64 h7 = load_3(s + 23) << 5;
|
||||
crypto_uint64 h8 = load_3(s + 26) << 4;
|
||||
crypto_uint64 h9 = load_3(s + 29) << 2;
|
||||
crypto_uint64 carry0;
|
||||
crypto_uint64 carry1;
|
||||
crypto_uint64 carry2;
|
||||
crypto_uint64 carry3;
|
||||
crypto_uint64 carry4;
|
||||
crypto_uint64 carry5;
|
||||
crypto_uint64 carry6;
|
||||
crypto_uint64 carry7;
|
||||
crypto_uint64 carry8;
|
||||
crypto_uint64 carry9;
|
||||
|
||||
carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF;
|
||||
carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF;
|
||||
carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF;
|
||||
carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF;
|
||||
carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF;
|
||||
|
||||
carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF;
|
||||
carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF;
|
||||
carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF;
|
||||
carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF;
|
||||
carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF;
|
||||
|
||||
h[0] = h0;
|
||||
h[1] = h1;
|
||||
h[2] = h2;
|
||||
h[3] = h3;
|
||||
h[4] = h4;
|
||||
h[5] = h5;
|
||||
h[6] = h6;
|
||||
h[7] = h7;
|
||||
h[8] = h8;
|
||||
h[9] = h9;
|
||||
}
|
1422
src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.S
Normal file
1422
src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.S
Normal file
File diff suppressed because it is too large
Load Diff
10
src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.h
Normal file
10
src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef LADDER_H
|
||||
#define LADDER_H
|
||||
|
||||
#include "fe.h"
|
||||
#include "ladder_namespace.h"
|
||||
|
||||
extern void ladder(fe *, const unsigned char *);
|
||||
|
||||
#endif //ifndef LADDER_H
|
||||
|
1277
src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.S
Normal file
1277
src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,10 @@
|
||||
#ifndef LADDER_BASE_H
|
||||
#define LADDER_BASE_H
|
||||
|
||||
#include "fe.h"
|
||||
#include "ladder_base_namespace.h"
|
||||
|
||||
extern void ladder_base(fe *, const unsigned char *);
|
||||
|
||||
#endif //ifndef LADDER_BASE_H
|
||||
|
@ -0,0 +1,8 @@
|
||||
#ifndef LADDER_BASE_NAMESPACE_H
|
||||
#define LADDER_BASE_NAMESPACE_H
|
||||
|
||||
#define ladder_base crypto_scalarmult_curve25519_sandy2x_ladder_base
|
||||
#define _ladder_base _crypto_scalarmult_curve25519_sandy2x_ladder_base
|
||||
|
||||
#endif //ifndef LADDER_BASE_NAMESPACE_H
|
||||
|
@ -0,0 +1,8 @@
|
||||
#ifndef LADDER_NAMESPACE_H
|
||||
#define LADDER_NAMESPACE_H
|
||||
|
||||
#define ladder crypto_scalarmult_curve25519_sandy2x_ladder
|
||||
#define _ladder _crypto_scalarmult_curve25519_sandy2x_ladder
|
||||
|
||||
#endif //ifndef LADDER_NAMESPACE_H
|
||||
|
@ -0,0 +1,57 @@
|
||||
/*
|
||||
This file is adapted from ref10/scalarmult.c:
|
||||
The code for Mongomery ladder is replace by the ladder assembly function;
|
||||
Inversion is done in the same way as amd64-51/.
|
||||
(fe is first converted into fe51 after Mongomery ladder)
|
||||
*/
|
||||
|
||||
#include "crypto_scalarmult.h"
|
||||
|
||||
#include "fe.h"
|
||||
#include "fe51.h"
|
||||
#include "ladder.h"
|
||||
|
||||
#define x1 var[0]
|
||||
#define x2 var[1]
|
||||
#define z2 var[2]
|
||||
|
||||
int crypto_scalarmult(unsigned char *q,
|
||||
const unsigned char *n,
|
||||
const unsigned char *p)
|
||||
{
|
||||
unsigned char e[32];
|
||||
unsigned int i;
|
||||
|
||||
fe var[3];
|
||||
|
||||
fe51 x_51;
|
||||
fe51 z_51;
|
||||
|
||||
for (i = 0;i < 32;++i) e[i] = n[i];
|
||||
e[0] &= 248;
|
||||
e[31] &= 127;
|
||||
e[31] |= 64;
|
||||
|
||||
fe_frombytes(x1, p);
|
||||
|
||||
ladder(var, e);
|
||||
|
||||
z_51.v[0] = (z2[1] << 26) + z2[0];
|
||||
z_51.v[1] = (z2[3] << 26) + z2[2];
|
||||
z_51.v[2] = (z2[5] << 26) + z2[4];
|
||||
z_51.v[3] = (z2[7] << 26) + z2[6];
|
||||
z_51.v[4] = (z2[9] << 26) + z2[8];
|
||||
|
||||
x_51.v[0] = (x2[1] << 26) + x2[0];
|
||||
x_51.v[1] = (x2[3] << 26) + x2[2];
|
||||
x_51.v[2] = (x2[5] << 26) + x2[4];
|
||||
x_51.v[3] = (x2[7] << 26) + x2[6];
|
||||
x_51.v[4] = (x2[9] << 26) + x2[8];
|
||||
|
||||
fe51_invert(&z_51, &z_51);
|
||||
fe51_mul(&x_51, &x_51, &z_51);
|
||||
fe51_pack(q, &x_51);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user