1
mirror of https://github.com/jedisct1/libsodium.git synced 2024-12-20 10:37:24 -07:00

Import the raw Sandy2x curve25519 implementation

This commit is contained in:
Frank Denis 2015-11-03 12:39:51 +01:00
parent 9623e58c03
commit 7e00ec1a9b
18 changed files with 3636 additions and 0 deletions

View File

@ -0,0 +1,44 @@
#include "crypto_scalarmult.h"
#include "fe.h"
#include "fe51.h"
#include "ladder_base.h"
#define x2 var[0]
#define z2 var[1]
int crypto_scalarmult_base(unsigned char *q,const unsigned char *n)
{
unsigned char e[32];
unsigned int i;
fe var[3];
fe51 x_51;
fe51 z_51;
for (i = 0;i < 32;++i) e[i] = n[i];
e[0] &= 248;
e[31] &= 127;
e[31] |= 64;
ladder_base(var, e);
z_51.v[0] = (z2[1] << 26) + z2[0];
z_51.v[1] = (z2[3] << 26) + z2[2];
z_51.v[2] = (z2[5] << 26) + z2[4];
z_51.v[3] = (z2[7] << 26) + z2[6];
z_51.v[4] = (z2[9] << 26) + z2[8];
x_51.v[0] = (x2[1] << 26) + x2[0];
x_51.v[1] = (x2[3] << 26) + x2[2];
x_51.v[2] = (x2[5] << 26) + x2[4];
x_51.v[3] = (x2[7] << 26) + x2[6];
x_51.v[4] = (x2[9] << 26) + x2[8];
fe51_invert(&z_51, &z_51);
fe51_mul(&x_51, &x_51, &z_51);
fe51_pack(q, &x_51);
return 0;
}

View File

@ -0,0 +1,38 @@
/*
REDMASK51 is from amd64-51/consts.s.
*/
#include "consts_namespace.h"
.data
.globl v0_0
.globl v1_0
.globl v2_1
.globl v2_2
.globl v9_0
.globl v9_9
.globl v19_19
.globl v38_1
.globl v38_19
.globl v38_38
.globl v121666_121666
.globl m25
.globl m26
.globl subc0
.globl subc2
.globl REDMASK51
.p2align 4
v0_0: .quad 0, 0
v1_0: .quad 1, 0
v2_1: .quad 2, 1
v2_2: .quad 2, 2
v9_0: .quad 9, 0
v9_9: .quad 9, 9
v19_19: .quad 19, 19
v38_1: .quad 38, 1
v38_19: .quad 38, 19
v38_38: .quad 38, 38
v121666_121666: .quad 121666, 121666
m25: .quad 33554431, 33554431
m26: .quad 67108863, 67108863
subc0: .quad 0x07FFFFDA, 0x03FFFFFE
subc2: .quad 0x07FFFFFE, 0x03FFFFFE
REDMASK51: .quad 0x0007FFFFFFFFFFFF

View File

@ -0,0 +1,22 @@
#ifndef CONSTS_NAMESPACE_H
#define CONSTS_NAMESPACE_H
#define v0_0 crypto_scalarmult_curve25519_sandy2x_v0_0
#define v1_0 crypto_scalarmult_curve25519_sandy2x_v1_0
#define v2_1 crypto_scalarmult_curve25519_sandy2x_v2_1
#define v2_2 crypto_scalarmult_curve25519_sandy2x_v2_2
#define v9_0 crypto_scalarmult_curve25519_sandy2x_v9_0
#define v9_9 crypto_scalarmult_curve25519_sandy2x_v9_9
#define v19_19 crypto_scalarmult_curve25519_sandy2x_v19_19
#define v38_1 crypto_scalarmult_curve25519_sandy2x_v38_1
#define v38_19 crypto_scalarmult_curve25519_sandy2x_v38_19
#define v38_38 crypto_scalarmult_curve25519_sandy2x_v38_38
#define v121666_121666 crypto_scalarmult_curve25519_sandy2x_v121666_121666
#define m25 crypto_scalarmult_curve25519_sandy2x_m25
#define m26 crypto_scalarmult_curve25519_sandy2x_m26
#define subc0 crypto_scalarmult_curve25519_sandy2x_subc0
#define subc2 crypto_scalarmult_curve25519_sandy2x_subc2
#define REDMASK51 crypto_scalarmult_curve25519_sandy2x_REDMASK51
#endif //ifndef CONSTS_NAMESPACE_H

View File

@ -0,0 +1,25 @@
/*
This file is adapted from ref10/fe.h:
All the redundant functions are removed.
*/
#ifndef FE_H
#define FE_H
#include "crypto_uint64.h"
typedef crypto_uint64 fe[10];
/*
fe means field element.
Here the field is \Z/(2^255-19).
An element t, entries t[0]...t[9], represents the integer
t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
Bounds on each t[i] vary depending on context.
*/
#define fe_frombytes crypto_scalarmult_curve25519_sandy2x_fe_frombytes
extern void fe_frombytes(fe, const unsigned char *);
#endif

View File

@ -0,0 +1,25 @@
/*
This file is adapted from amd64-51/fe25519.h:
'fe25519' is renamed as 'fe51';
All the redundant functions are removed;
New function fe51_nsquare is introduced.
*/
#ifndef FE51_H
#define FE51_H
#include "crypto_uint64.h"
#include "fe51_namespace.h"
typedef struct
{
crypto_uint64 v[5];
}
fe51;
extern void fe51_pack(unsigned char *, const fe51 *);
extern void fe51_mul(fe51 *, const fe51 *, const fe51 *);
extern void fe51_nsquare(fe51 *, const fe51 *, int);
extern void fe51_invert(fe51 *, const fe51 *);
#endif

View File

@ -0,0 +1,53 @@
/*
This file is adapted from amd64-51/fe25519_invert.c:
Loops of squares are replaced by nsquares for better performance.
*/
#include "fe51.h"
#define fe51_square(x, y) fe51_nsquare(x, y, 1)
void fe51_invert(fe51 *r, const fe51 *x)
{
fe51 z2;
fe51 z9;
fe51 z11;
fe51 z2_5_0;
fe51 z2_10_0;
fe51 z2_20_0;
fe51 z2_50_0;
fe51 z2_100_0;
fe51 t;
/* 2 */ fe51_square(&z2,x);
/* 4 */ fe51_square(&t,&z2);
/* 8 */ fe51_square(&t,&t);
/* 9 */ fe51_mul(&z9,&t,x);
/* 11 */ fe51_mul(&z11,&z9,&z2);
/* 22 */ fe51_square(&t,&z11);
/* 2^5 - 2^0 = 31 */ fe51_mul(&z2_5_0,&t,&z9);
/* 2^10 - 2^5 */ fe51_nsquare(&t,&z2_5_0, 5);
/* 2^10 - 2^0 */ fe51_mul(&z2_10_0,&t,&z2_5_0);
/* 2^20 - 2^10 */ fe51_nsquare(&t,&z2_10_0, 10);
/* 2^20 - 2^0 */ fe51_mul(&z2_20_0,&t,&z2_10_0);
/* 2^40 - 2^20 */ fe51_nsquare(&t,&z2_20_0, 20);
/* 2^40 - 2^0 */ fe51_mul(&t,&t,&z2_20_0);
/* 2^50 - 2^10 */ fe51_nsquare(&t,&t,10);
/* 2^50 - 2^0 */ fe51_mul(&z2_50_0,&t,&z2_10_0);
/* 2^100 - 2^50 */ fe51_nsquare(&t,&z2_50_0, 50);
/* 2^100 - 2^0 */ fe51_mul(&z2_100_0,&t,&z2_50_0);
/* 2^200 - 2^100 */ fe51_nsquare(&t,&z2_100_0, 100);
/* 2^200 - 2^0 */ fe51_mul(&t,&t,&z2_100_0);
/* 2^250 - 2^50 */ fe51_nsquare(&t,&t, 50);
/* 2^250 - 2^0 */ fe51_mul(&t,&t,&z2_50_0);
/* 2^255 - 2^5 */ fe51_nsquare(&t,&t,5);
/* 2^255 - 21 */ fe51_mul(r,&t,&z11);
}

View File

@ -0,0 +1,185 @@
/*
This file is basically amd64-51/fe25519_mul.s.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
.text
.p2align 5
.globl _fe51_mul
.globl fe51_mul
_fe51_mul:
fe51_mul:
mov %rsp,%r11
and $31,%r11
add $96,%r11
sub %r11,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq %rdi,56(%rsp)
mov %rdx,%rcx
movq 24(%rsi),%rdx
imulq $19,%rdx,%rax
movq %rax,64(%rsp)
mulq 16(%rcx)
mov %rax,%r8
mov %rdx,%r9
movq 32(%rsi),%rdx
imulq $19,%rdx,%rax
movq %rax,72(%rsp)
mulq 8(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsi),%rax
mulq 0(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsi),%rax
mulq 8(%rcx)
mov %rax,%r10
mov %rdx,%r11
movq 0(%rsi),%rax
mulq 16(%rcx)
mov %rax,%r12
mov %rdx,%r13
movq 0(%rsi),%rax
mulq 24(%rcx)
mov %rax,%r14
mov %rdx,%r15
movq 0(%rsi),%rax
mulq 32(%rcx)
mov %rax,%rbx
mov %rdx,%rbp
movq 8(%rsi),%rax
mulq 0(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 8(%rsi),%rax
mulq 8(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 8(%rsi),%rax
mulq 16(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 8(%rsi),%rax
mulq 24(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 8(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 16(%rsi),%rax
mulq 0(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 16(%rsi),%rax
mulq 8(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 16(%rsi),%rax
mulq 16(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 16(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 24(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 16(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 24(%rsi),%rax
mulq 0(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 24(%rsi),%rax
mulq 8(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 64(%rsp),%rax
mulq 24(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 64(%rsp),%rax
mulq 32(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 32(%rsi),%rax
mulq 0(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 72(%rsp),%rax
mulq 16(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 72(%rsp),%rax
mulq 24(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 72(%rsp),%rax
mulq 32(%rcx)
add %rax,%r14
adc %rdx,%r15
movq REDMASK51,%rsi
shld $13,%r8,%r9
and %rsi,%r8
shld $13,%r10,%r11
and %rsi,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rsi,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rsi,%r14
add %r13,%r14
shld $13,%rbx,%rbp
and %rsi,%rbx
add %r15,%rbx
imulq $19,%rbp,%rdx
add %rdx,%r8
mov %r8,%rdx
shr $51,%rdx
add %r10,%rdx
mov %rdx,%rcx
shr $51,%rdx
and %rsi,%r8
add %r12,%rdx
mov %rdx,%r9
shr $51,%rdx
and %rsi,%rcx
add %r14,%rdx
mov %rdx,%rax
shr $51,%rdx
and %rsi,%r9
add %rbx,%rdx
mov %rdx,%r10
shr $51,%rdx
and %rsi,%rax
imulq $19,%rdx,%rdx
add %rdx,%r8
and %rsi,%r10
movq %r8,0(%rdi)
movq %rcx,8(%rdi)
movq %r9,16(%rdi)
movq %rax,24(%rdi)
movq %r10,32(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
add %r11,%rsp
mov %rdi,%rax
mov %rsi,%rdx
ret

View File

@ -0,0 +1,16 @@
#ifndef FE51_NAMESPACE_H
#define FE51_NAMESPACE_H
#define fe51 crypto_scalarmult_curve25519_sandy2x_fe51
#define _fe51 _crypto_scalarmult_curve25519_sandy2x_fe51
#define fe51_pack crypto_scalarmult_curve25519_sandy2x_fe51_pack
#define _fe51_pack _crypto_scalarmult_curve25519_sandy2x_fe51_pack
#define fe51_mul crypto_scalarmult_curve25519_sandy2x_fe51_mul
#define _fe51_mul _crypto_scalarmult_curve25519_sandy2x_fe51_mul
#define fe51_nsquare crypto_scalarmult_curve25519_sandy2x_fe51_nsquare
#define _fe51_nsquare _crypto_scalarmult_curve25519_sandy2x_fe51_nsquare
#define fe51_invert crypto_scalarmult_curve25519_sandy2x_fe51_invert
#endif //ifndef FE51_NAMESPACE_H

View File

@ -0,0 +1,155 @@
/*
This file is adapted from amd64-51/fe25519_square.s:
Adding loop to perform n squares.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
.p2align 5
.global _fe51_nsquare
.global fe51_nsquare
_fe51_nsquare:
fe51_nsquare:
mov %rsp,%r11
and $31,%r11
add $64,%r11
sub %r11,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq 0(%rsi),%rcx
movq 8(%rsi),%r8
movq 16(%rsi),%r9
movq 24(%rsi),%rax
movq 32(%rsi),%rsi
movq %r9,16(%rdi)
movq %rax,24(%rdi)
movq %rsi,32(%rdi)
mov %rdx,%rsi
._loop:
sub $1,%rsi
mov %rcx,%rax
mul %rcx
add %rcx,%rcx
mov %rax,%r9
mov %rdx,%r10
mov %rcx,%rax
mul %r8
mov %rax,%r11
mov %rdx,%r12
mov %rcx,%rax
mulq 16(%rdi)
mov %rax,%r13
mov %rdx,%r14
mov %rcx,%rax
mulq 24(%rdi)
mov %rax,%r15
mov %rdx,%rbx
mov %rcx,%rax
mulq 32(%rdi)
mov %rax,%rcx
mov %rdx,%rbp
mov %r8,%rax
mul %r8
add %r8,%r8
add %rax,%r13
adc %rdx,%r14
mov %r8,%rax
mulq 16(%rdi)
add %rax,%r15
adc %rdx,%rbx
mov %r8,%rax
imulq $19, %r8,%r8
mulq 24(%rdi)
add %rax,%rcx
adc %rdx,%rbp
mov %r8,%rax
mulq 32(%rdi)
add %rax,%r9
adc %rdx,%r10
movq 16(%rdi),%rax
mulq 16(%rdi)
add %rax,%rcx
adc %rdx,%rbp
shld $13,%rcx,%rbp
movq 16(%rdi),%rax
imulq $38, %rax,%rax
mulq 24(%rdi)
add %rax,%r9
adc %rdx,%r10
shld $13,%r9,%r10
movq 16(%rdi),%rax
imulq $38, %rax,%rax
mulq 32(%rdi)
add %rax,%r11
adc %rdx,%r12
movq 24(%rdi),%rax
imulq $19, %rax,%rax
mulq 24(%rdi)
add %rax,%r11
adc %rdx,%r12
shld $13,%r11,%r12
movq 24(%rdi),%rax
imulq $38, %rax,%rax
mulq 32(%rdi)
add %rax,%r13
adc %rdx,%r14
shld $13,%r13,%r14
movq 32(%rdi),%rax
imulq $19, %rax,%rax
mulq 32(%rdi)
add %rax,%r15
adc %rdx,%rbx
shld $13,%r15,%rbx
movq REDMASK51,%rdx
and %rdx,%rcx
add %rbx,%rcx
and %rdx,%r9
and %rdx,%r11
add %r10,%r11
and %rdx,%r13
add %r12,%r13
and %rdx,%r15
add %r14,%r15
imulq $19, %rbp,%rbp
lea (%r9,%rbp),%r9
mov %r9,%rax
shr $51,%r9
add %r11,%r9
and %rdx,%rax
mov %r9,%r8
shr $51,%r9
add %r13,%r9
and %rdx,%r8
mov %r9,%r10
shr $51,%r9
add %r15,%r9
and %rdx,%r10
movq %r10,16(%rdi)
mov %r9,%r10
shr $51,%r9
add %rcx,%r9
and %rdx,%r10
movq %r10,24(%rdi)
mov %r9,%r10
shr $51,%r9
imulq $19, %r9,%r9
lea (%rax,%r9),%rcx
and %rdx,%r10
movq %r10,32(%rdi)
cmp $0,%rsi
jne ._loop
movq %rcx,0(%rdi)
movq %r8,8(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
add %r11,%rsp
ret

View File

@ -0,0 +1,209 @@
/*
This file is the result of merging
amd64-51/fe25519_pack.c and amd64-51/fe25519_freeze.s.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
.p2align 5
.global _fe51_pack
.global fe51_pack
_fe51_pack:
fe51_pack:
mov %rsp,%r11
and $31,%r11
add $32,%r11
sub %r11,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq 0(%rsi),%rdx
movq 8(%rsi),%rcx
movq 16(%rsi),%r8
movq 24(%rsi),%r9
movq 32(%rsi),%rsi
movq REDMASK51,%rax
lea -18(%rax),%r10
mov $3,%r11
._reduceloop:
mov %rdx,%r12
shr $51,%r12
and %rax,%rdx
add %r12,%rcx
mov %rcx,%r12
shr $51,%r12
and %rax,%rcx
add %r12,%r8
mov %r8,%r12
shr $51,%r12
and %rax,%r8
add %r12,%r9
mov %r9,%r12
shr $51,%r12
and %rax,%r9
add %r12,%rsi
mov %rsi,%r12
shr $51,%r12
and %rax,%rsi
imulq $19, %r12,%r12
add %r12,%rdx
sub $1,%r11
ja ._reduceloop
mov $1,%r12
cmp %r10,%rdx
cmovl %r11,%r12
cmp %rax,%rcx
cmovne %r11,%r12
cmp %rax,%r8
cmovne %r11,%r12
cmp %rax,%r9
cmovne %r11,%r12
cmp %rax,%rsi
cmovne %r11,%r12
neg %r12
and %r12,%rax
and %r12,%r10
sub %r10,%rdx
sub %rax,%rcx
sub %rax,%r8
sub %rax,%r9
sub %rax,%rsi
mov %rdx,%rax
and $0xFF,%eax
movb %al,0(%rdi)
mov %rdx,%rax
shr $8,%rax
and $0xFF,%eax
movb %al,1(%rdi)
mov %rdx,%rax
shr $16,%rax
and $0xFF,%eax
movb %al,2(%rdi)
mov %rdx,%rax
shr $24,%rax
and $0xFF,%eax
movb %al,3(%rdi)
mov %rdx,%rax
shr $32,%rax
and $0xFF,%eax
movb %al,4(%rdi)
mov %rdx,%rax
shr $40,%rax
and $0xFF,%eax
movb %al,5(%rdi)
mov %rdx,%rdx
shr $48,%rdx
mov %rcx,%rax
shl $3,%rax
and $0xF8,%eax
xor %rdx,%rax
movb %al,6(%rdi)
mov %rcx,%rdx
shr $5,%rdx
and $0xFF,%edx
movb %dl,7(%rdi)
mov %rcx,%rdx
shr $13,%rdx
and $0xFF,%edx
movb %dl,8(%rdi)
mov %rcx,%rdx
shr $21,%rdx
and $0xFF,%edx
movb %dl,9(%rdi)
mov %rcx,%rdx
shr $29,%rdx
and $0xFF,%edx
movb %dl,10(%rdi)
mov %rcx,%rdx
shr $37,%rdx
and $0xFF,%edx
movb %dl,11(%rdi)
mov %rcx,%rdx
shr $45,%rdx
mov %r8,%rcx
shl $6,%rcx
and $0xC0,%ecx
xor %rdx,%rcx
movb %cl,12(%rdi)
mov %r8,%rdx
shr $2,%rdx
and $0xFF,%edx
movb %dl,13(%rdi)
mov %r8,%rdx
shr $10,%rdx
and $0xFF,%edx
movb %dl,14(%rdi)
mov %r8,%rdx
shr $18,%rdx
and $0xFF,%edx
movb %dl,15(%rdi)
mov %r8,%rdx
shr $26,%rdx
and $0xFF,%edx
movb %dl,16(%rdi)
mov %r8,%rdx
shr $34,%rdx
and $0xFF,%edx
movb %dl,17(%rdi)
mov %r8,%rdx
shr $42,%rdx
movb %dl,18(%rdi)
mov %r8,%rdx
shr $50,%rdx
mov %r9,%rcx
shl $1,%rcx
and $0xFE,%ecx
xor %rdx,%rcx
movb %cl,19(%rdi)
mov %r9,%rdx
shr $7,%rdx
and $0xFF,%edx
movb %dl,20(%rdi)
mov %r9,%rdx
shr $15,%rdx
and $0xFF,%edx
movb %dl,21(%rdi)
mov %r9,%rdx
shr $23,%rdx
and $0xFF,%edx
movb %dl,22(%rdi)
mov %r9,%rdx
shr $31,%rdx
and $0xFF,%edx
movb %dl,23(%rdi)
mov %r9,%rdx
shr $39,%rdx
and $0xFF,%edx
movb %dl,24(%rdi)
mov %r9,%rdx
shr $47,%rdx
mov %rsi,%rcx
shl $4,%rcx
and $0xF0,%ecx
xor %rdx,%rcx
movb %cl,25(%rdi)
mov %rsi,%rdx
shr $4,%rdx
and $0xFF,%edx
movb %dl,26(%rdi)
mov %rsi,%rdx
shr $12,%rdx
and $0xFF,%edx
movb %dl,27(%rdi)
mov %rsi,%rdx
shr $20,%rdx
and $0xFF,%edx
movb %dl,28(%rdi)
mov %rsi,%rdx
shr $28,%rdx
and $0xFF,%edx
movb %dl,29(%rdi)
mov %rsi,%rdx
shr $36,%rdx
and $0xFF,%edx
movb %dl,30(%rdi)
mov %rsi,%rsi
shr $44,%rsi
movb %sil,31(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
add %r11,%rsp
ret

View File

@ -0,0 +1,72 @@
/*
This file is basically ref10/fe_frombytes.h.
*/
#include "fe.h"
#include "crypto_uint64.h"
static crypto_uint64 load_3(const unsigned char *in)
{
crypto_uint64 result;
result = (crypto_uint64) in[0];
result |= ((crypto_uint64) in[1]) << 8;
result |= ((crypto_uint64) in[2]) << 16;
return result;
}
static crypto_uint64 load_4(const unsigned char *in)
{
crypto_uint64 result;
result = (crypto_uint64) in[0];
result |= ((crypto_uint64) in[1]) << 8;
result |= ((crypto_uint64) in[2]) << 16;
result |= ((crypto_uint64) in[3]) << 24;
return result;
}
void fe_frombytes(fe h,const unsigned char *s)
{
crypto_uint64 h0 = load_4(s);
crypto_uint64 h1 = load_3(s + 4) << 6;
crypto_uint64 h2 = load_3(s + 7) << 5;
crypto_uint64 h3 = load_3(s + 10) << 3;
crypto_uint64 h4 = load_3(s + 13) << 2;
crypto_uint64 h5 = load_4(s + 16);
crypto_uint64 h6 = load_3(s + 20) << 7;
crypto_uint64 h7 = load_3(s + 23) << 5;
crypto_uint64 h8 = load_3(s + 26) << 4;
crypto_uint64 h9 = load_3(s + 29) << 2;
crypto_uint64 carry0;
crypto_uint64 carry1;
crypto_uint64 carry2;
crypto_uint64 carry3;
crypto_uint64 carry4;
crypto_uint64 carry5;
crypto_uint64 carry6;
crypto_uint64 carry7;
crypto_uint64 carry8;
crypto_uint64 carry9;
carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF;
carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF;
carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF;
carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF;
carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF;
carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF;
carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF;
carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF;
carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF;
carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
h[5] = h5;
h[6] = h6;
h[7] = h7;
h[8] = h8;
h[9] = h9;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
#ifndef LADDER_H
#define LADDER_H
#include "fe.h"
#include "ladder_namespace.h"
extern void ladder(fe *, const unsigned char *);
#endif //ifndef LADDER_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
#ifndef LADDER_BASE_H
#define LADDER_BASE_H
#include "fe.h"
#include "ladder_base_namespace.h"
extern void ladder_base(fe *, const unsigned char *);
#endif //ifndef LADDER_BASE_H

View File

@ -0,0 +1,8 @@
#ifndef LADDER_BASE_NAMESPACE_H
#define LADDER_BASE_NAMESPACE_H
#define ladder_base crypto_scalarmult_curve25519_sandy2x_ladder_base
#define _ladder_base _crypto_scalarmult_curve25519_sandy2x_ladder_base
#endif //ifndef LADDER_BASE_NAMESPACE_H

View File

@ -0,0 +1,8 @@
#ifndef LADDER_NAMESPACE_H
#define LADDER_NAMESPACE_H
#define ladder crypto_scalarmult_curve25519_sandy2x_ladder
#define _ladder _crypto_scalarmult_curve25519_sandy2x_ladder
#endif //ifndef LADDER_NAMESPACE_H

View File

@ -0,0 +1,57 @@
/*
This file is adapted from ref10/scalarmult.c:
The code for Mongomery ladder is replace by the ladder assembly function;
Inversion is done in the same way as amd64-51/.
(fe is first converted into fe51 after Mongomery ladder)
*/
#include "crypto_scalarmult.h"
#include "fe.h"
#include "fe51.h"
#include "ladder.h"
#define x1 var[0]
#define x2 var[1]
#define z2 var[2]
int crypto_scalarmult(unsigned char *q,
const unsigned char *n,
const unsigned char *p)
{
unsigned char e[32];
unsigned int i;
fe var[3];
fe51 x_51;
fe51 z_51;
for (i = 0;i < 32;++i) e[i] = n[i];
e[0] &= 248;
e[31] &= 127;
e[31] |= 64;
fe_frombytes(x1, p);
ladder(var, e);
z_51.v[0] = (z2[1] << 26) + z2[0];
z_51.v[1] = (z2[3] << 26) + z2[2];
z_51.v[2] = (z2[5] << 26) + z2[4];
z_51.v[3] = (z2[7] << 26) + z2[6];
z_51.v[4] = (z2[9] << 26) + z2[8];
x_51.v[0] = (x2[1] << 26) + x2[0];
x_51.v[1] = (x2[3] << 26) + x2[2];
x_51.v[2] = (x2[5] << 26) + x2[4];
x_51.v[3] = (x2[7] << 26) + x2[6];
x_51.v[4] = (x2[9] << 26) + x2[8];
fe51_invert(&z_51, &z_51);
fe51_mul(&x_51, &x_51, &z_51);
fe51_pack(q, &x_51);
return 0;
}