crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()
Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate them from a macro that's passed an argument enc=1 or enc=0. This reduces the length of aesni-intel_asm.S by 112 lines while still producing the exact same object file in both 32-bit and 64-bit mode. Signed-off-by: Eric Biggers <ebiggers@google.com> Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
1d27e1f5c8
commit
ea9459ef36
@ -2825,28 +2825,24 @@ SYM_FUNC_END(aesni_ctr_enc)
|
||||
.previous
|
||||
|
||||
/*
|
||||
* _aesni_gf128mul_x_ble: internal ABI
|
||||
* Multiply in GF(2^128) for XTS IVs
|
||||
* _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
|
||||
* input:
|
||||
* IV: current IV
|
||||
* GF128MUL_MASK == mask with 0x87 and 0x01
|
||||
* output:
|
||||
* IV: next IV
|
||||
* changed:
|
||||
* CTR: == temporary value
|
||||
* KEY: == temporary value
|
||||
*/
|
||||
#define _aesni_gf128mul_x_ble() \
|
||||
pshufd $0x13, IV, KEY; \
|
||||
paddq IV, IV; \
|
||||
psrad $31, KEY; \
|
||||
pand GF128MUL_MASK, KEY; \
|
||||
pxor KEY, IV;
|
||||
.macro _aesni_gf128mul_x_ble
|
||||
pshufd $0x13, IV, KEY
|
||||
paddq IV, IV
|
||||
psrad $31, KEY
|
||||
pand GF128MUL_MASK, KEY
|
||||
pxor KEY, IV
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
|
||||
* const u8 *src, unsigned int len, le128 *iv)
|
||||
*/
|
||||
SYM_FUNC_START(aesni_xts_enc)
|
||||
.macro _aesni_xts_crypt enc
|
||||
FRAME_BEGIN
|
||||
#ifndef __x86_64__
|
||||
pushl IVP
|
||||
@ -2865,35 +2861,46 @@ SYM_FUNC_START(aesni_xts_enc)
|
||||
movups (IVP), IV
|
||||
|
||||
mov 480(KEYP), KLEN
|
||||
.if !\enc
|
||||
add $240, KEYP
|
||||
|
||||
.Lxts_enc_loop4:
|
||||
test $15, LEN
|
||||
jz .Lxts_loop4\@
|
||||
sub $16, LEN
|
||||
.endif
|
||||
|
||||
.Lxts_loop4\@:
|
||||
sub $64, LEN
|
||||
jl .Lxts_enc_1x
|
||||
jl .Lxts_1x\@
|
||||
|
||||
movdqa IV, STATE1
|
||||
movdqu 0x00(INP), IN
|
||||
pxor IN, STATE1
|
||||
movdqu IV, 0x00(OUTP)
|
||||
|
||||
_aesni_gf128mul_x_ble()
|
||||
_aesni_gf128mul_x_ble
|
||||
movdqa IV, STATE2
|
||||
movdqu 0x10(INP), IN
|
||||
pxor IN, STATE2
|
||||
movdqu IV, 0x10(OUTP)
|
||||
|
||||
_aesni_gf128mul_x_ble()
|
||||
_aesni_gf128mul_x_ble
|
||||
movdqa IV, STATE3
|
||||
movdqu 0x20(INP), IN
|
||||
pxor IN, STATE3
|
||||
movdqu IV, 0x20(OUTP)
|
||||
|
||||
_aesni_gf128mul_x_ble()
|
||||
_aesni_gf128mul_x_ble
|
||||
movdqa IV, STATE4
|
||||
movdqu 0x30(INP), IN
|
||||
pxor IN, STATE4
|
||||
movdqu IV, 0x30(OUTP)
|
||||
|
||||
.if \enc
|
||||
call _aesni_enc4
|
||||
.else
|
||||
call _aesni_dec4
|
||||
.endif
|
||||
|
||||
movdqu 0x00(OUTP), IN
|
||||
pxor IN, STATE1
|
||||
@ -2911,17 +2918,17 @@ SYM_FUNC_START(aesni_xts_enc)
|
||||
pxor IN, STATE4
|
||||
movdqu STATE4, 0x30(OUTP)
|
||||
|
||||
_aesni_gf128mul_x_ble()
|
||||
_aesni_gf128mul_x_ble
|
||||
|
||||
add $64, INP
|
||||
add $64, OUTP
|
||||
test LEN, LEN
|
||||
jnz .Lxts_enc_loop4
|
||||
jnz .Lxts_loop4\@
|
||||
|
||||
.Lxts_enc_ret_iv:
|
||||
.Lxts_ret_iv\@:
|
||||
movups IV, (IVP)
|
||||
|
||||
.Lxts_enc_ret:
|
||||
.Lxts_ret\@:
|
||||
#ifndef __x86_64__
|
||||
popl KLEN
|
||||
popl KEYP
|
||||
@ -2931,39 +2938,60 @@ SYM_FUNC_START(aesni_xts_enc)
|
||||
FRAME_END
|
||||
RET
|
||||
|
||||
.Lxts_enc_1x:
|
||||
.Lxts_1x\@:
|
||||
add $64, LEN
|
||||
jz .Lxts_enc_ret_iv
|
||||
jz .Lxts_ret_iv\@
|
||||
.if \enc
|
||||
sub $16, LEN
|
||||
jl .Lxts_enc_cts4
|
||||
jl .Lxts_cts4\@
|
||||
.endif
|
||||
|
||||
.Lxts_enc_loop1:
|
||||
.Lxts_loop1\@:
|
||||
movdqu (INP), STATE
|
||||
.if \enc
|
||||
pxor IV, STATE
|
||||
call _aesni_enc1
|
||||
pxor IV, STATE
|
||||
_aesni_gf128mul_x_ble()
|
||||
|
||||
test LEN, LEN
|
||||
jz .Lxts_enc_out
|
||||
|
||||
.else
|
||||
add $16, INP
|
||||
sub $16, LEN
|
||||
jl .Lxts_enc_cts1
|
||||
jl .Lxts_cts1\@
|
||||
pxor IV, STATE
|
||||
call _aesni_dec1
|
||||
.endif
|
||||
pxor IV, STATE
|
||||
_aesni_gf128mul_x_ble
|
||||
|
||||
test LEN, LEN
|
||||
jz .Lxts_out\@
|
||||
|
||||
.if \enc
|
||||
add $16, INP
|
||||
sub $16, LEN
|
||||
jl .Lxts_cts1\@
|
||||
.endif
|
||||
|
||||
movdqu STATE, (OUTP)
|
||||
add $16, OUTP
|
||||
jmp .Lxts_enc_loop1
|
||||
jmp .Lxts_loop1\@
|
||||
|
||||
.Lxts_enc_out:
|
||||
.Lxts_out\@:
|
||||
movdqu STATE, (OUTP)
|
||||
jmp .Lxts_enc_ret_iv
|
||||
jmp .Lxts_ret_iv\@
|
||||
|
||||
.Lxts_enc_cts4:
|
||||
.if \enc
|
||||
.Lxts_cts4\@:
|
||||
movdqa STATE4, STATE
|
||||
sub $16, OUTP
|
||||
.Lxts_cts1\@:
|
||||
.else
|
||||
.Lxts_cts1\@:
|
||||
movdqa IV, STATE4
|
||||
_aesni_gf128mul_x_ble
|
||||
|
||||
.Lxts_enc_cts1:
|
||||
pxor IV, STATE
|
||||
call _aesni_dec1
|
||||
pxor IV, STATE
|
||||
.endif
|
||||
#ifndef __x86_64__
|
||||
lea .Lcts_permute_table, T1
|
||||
#else
|
||||
@ -2989,12 +3017,26 @@ SYM_FUNC_START(aesni_xts_enc)
|
||||
pblendvb IN2, IN1
|
||||
movaps IN1, STATE
|
||||
|
||||
.if \enc
|
||||
pxor IV, STATE
|
||||
call _aesni_enc1
|
||||
pxor IV, STATE
|
||||
.else
|
||||
pxor STATE4, STATE
|
||||
call _aesni_dec1
|
||||
pxor STATE4, STATE
|
||||
.endif
|
||||
|
||||
movups STATE, (OUTP)
|
||||
jmp .Lxts_enc_ret
|
||||
jmp .Lxts_ret\@
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
|
||||
* const u8 *src, unsigned int len, le128 *iv)
|
||||
*/
|
||||
SYM_FUNC_START(aesni_xts_enc)
|
||||
_aesni_xts_crypt 1
|
||||
SYM_FUNC_END(aesni_xts_enc)
|
||||
|
||||
/*
|
||||
@ -3002,159 +3044,5 @@ SYM_FUNC_END(aesni_xts_enc)
|
||||
* const u8 *src, unsigned int len, le128 *iv)
|
||||
*/
|
||||
SYM_FUNC_START(aesni_xts_dec)
|
||||
FRAME_BEGIN
|
||||
#ifndef __x86_64__
|
||||
pushl IVP
|
||||
pushl LEN
|
||||
pushl KEYP
|
||||
pushl KLEN
|
||||
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
|
||||
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
|
||||
movl (FRAME_OFFSET+28)(%esp), INP # src
|
||||
movl (FRAME_OFFSET+32)(%esp), LEN # len
|
||||
movl (FRAME_OFFSET+36)(%esp), IVP # iv
|
||||
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
|
||||
#else
|
||||
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
|
||||
#endif
|
||||
movups (IVP), IV
|
||||
|
||||
mov 480(KEYP), KLEN
|
||||
add $240, KEYP
|
||||
|
||||
test $15, LEN
|
||||
jz .Lxts_dec_loop4
|
||||
sub $16, LEN
|
||||
|
||||
.Lxts_dec_loop4:
|
||||
sub $64, LEN
|
||||
jl .Lxts_dec_1x
|
||||
|
||||
movdqa IV, STATE1
|
||||
movdqu 0x00(INP), IN
|
||||
pxor IN, STATE1
|
||||
movdqu IV, 0x00(OUTP)
|
||||
|
||||
_aesni_gf128mul_x_ble()
|
||||
movdqa IV, STATE2
|
||||
movdqu 0x10(INP), IN
|
||||
pxor IN, STATE2
|
||||
movdqu IV, 0x10(OUTP)
|
||||
|
||||
_aesni_gf128mul_x_ble()
|
||||
movdqa IV, STATE3
|
||||
movdqu 0x20(INP), IN
|
||||
pxor IN, STATE3
|
||||
movdqu IV, 0x20(OUTP)
|
||||
|
||||
_aesni_gf128mul_x_ble()
|
||||
movdqa IV, STATE4
|
||||
movdqu 0x30(INP), IN
|
||||
pxor IN, STATE4
|
||||
movdqu IV, 0x30(OUTP)
|
||||
|
||||
call _aesni_dec4
|
||||
|
||||
movdqu 0x00(OUTP), IN
|
||||
pxor IN, STATE1
|
||||
movdqu STATE1, 0x00(OUTP)
|
||||
|
||||
movdqu 0x10(OUTP), IN
|
||||
pxor IN, STATE2
|
||||
movdqu STATE2, 0x10(OUTP)
|
||||
|
||||
movdqu 0x20(OUTP), IN
|
||||
pxor IN, STATE3
|
||||
movdqu STATE3, 0x20(OUTP)
|
||||
|
||||
movdqu 0x30(OUTP), IN
|
||||
pxor IN, STATE4
|
||||
movdqu STATE4, 0x30(OUTP)
|
||||
|
||||
_aesni_gf128mul_x_ble()
|
||||
|
||||
add $64, INP
|
||||
add $64, OUTP
|
||||
test LEN, LEN
|
||||
jnz .Lxts_dec_loop4
|
||||
|
||||
.Lxts_dec_ret_iv:
|
||||
movups IV, (IVP)
|
||||
|
||||
.Lxts_dec_ret:
|
||||
#ifndef __x86_64__
|
||||
popl KLEN
|
||||
popl KEYP
|
||||
popl LEN
|
||||
popl IVP
|
||||
#endif
|
||||
FRAME_END
|
||||
RET
|
||||
|
||||
.Lxts_dec_1x:
|
||||
add $64, LEN
|
||||
jz .Lxts_dec_ret_iv
|
||||
|
||||
.Lxts_dec_loop1:
|
||||
movdqu (INP), STATE
|
||||
|
||||
add $16, INP
|
||||
sub $16, LEN
|
||||
jl .Lxts_dec_cts1
|
||||
|
||||
pxor IV, STATE
|
||||
call _aesni_dec1
|
||||
pxor IV, STATE
|
||||
_aesni_gf128mul_x_ble()
|
||||
|
||||
test LEN, LEN
|
||||
jz .Lxts_dec_out
|
||||
|
||||
movdqu STATE, (OUTP)
|
||||
add $16, OUTP
|
||||
jmp .Lxts_dec_loop1
|
||||
|
||||
.Lxts_dec_out:
|
||||
movdqu STATE, (OUTP)
|
||||
jmp .Lxts_dec_ret_iv
|
||||
|
||||
.Lxts_dec_cts1:
|
||||
movdqa IV, STATE4
|
||||
_aesni_gf128mul_x_ble()
|
||||
|
||||
pxor IV, STATE
|
||||
call _aesni_dec1
|
||||
pxor IV, STATE
|
||||
|
||||
#ifndef __x86_64__
|
||||
lea .Lcts_permute_table, T1
|
||||
#else
|
||||
lea .Lcts_permute_table(%rip), T1
|
||||
#endif
|
||||
add LEN, INP /* rewind input pointer */
|
||||
add $16, LEN /* # bytes in final block */
|
||||
movups (INP), IN1
|
||||
|
||||
mov T1, IVP
|
||||
add $32, IVP
|
||||
add LEN, T1
|
||||
sub LEN, IVP
|
||||
add OUTP, LEN
|
||||
|
||||
movups (T1), %xmm4
|
||||
movaps STATE, IN2
|
||||
pshufb %xmm4, STATE
|
||||
movups STATE, (LEN)
|
||||
|
||||
movups (IVP), %xmm0
|
||||
pshufb %xmm0, IN1
|
||||
pblendvb IN2, IN1
|
||||
movaps IN1, STATE
|
||||
|
||||
pxor STATE4, STATE
|
||||
call _aesni_dec1
|
||||
pxor STATE4, STATE
|
||||
|
||||
movups STATE, (OUTP)
|
||||
jmp .Lxts_dec_ret
|
||||
_aesni_xts_crypt 0
|
||||
SYM_FUNC_END(aesni_xts_dec)
|
||||
|
Loading…
Reference in New Issue
Block a user