mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "Here is the crypto update for 3.9: - Added accelerated implementation of crc32 using pclmulqdq. - Added test vector for fcrypt. - Added support for OMAP4/AM33XX cipher and hash. - Fixed loose crypto_user input checks. - Misc fixes" * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (43 commits) crypto: user - ensure user supplied strings are nul-terminated crypto: user - fix empty string test in report API crypto: user - fix info leaks in report API crypto: caam - Added property fsl,sec-era in SEC4.0 device tree binding. crypto: use ERR_CAST crypto: atmel-aes - adjust duplicate test crypto: crc32-pclmul - Kill warning on x86-32 crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_* crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets crypto: aesni-intel - add ENDPROC statements for assembler functions crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets crypto: testmgr - add test vector for fcrypt ...
This commit is contained in:
@@ -113,7 +113,7 @@ PROPERTIES
|
||||
EXAMPLE
|
||||
crypto@300000 {
|
||||
compatible = "fsl,sec-v4.0";
|
||||
fsl,sec-era = <0x2>;
|
||||
fsl,sec-era = <2>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
reg = <0x300000 0x10000>;
|
||||
|
||||
@@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
|
||||
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
|
||||
|
||||
aes-i586-y := aes-i586-asm_32.o aes_glue.o
|
||||
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
|
||||
@@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|
||||
crc32c-intel-y := crc32c-intel_glue.o
|
||||
crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
|
||||
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
.file "aes-i586-asm.S"
|
||||
.text
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
|
||||
@@ -219,14 +220,10 @@
|
||||
// AES (Rijndael) Encryption Subroutine
|
||||
/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
|
||||
|
||||
.global aes_enc_blk
|
||||
|
||||
.extern crypto_ft_tab
|
||||
.extern crypto_fl_tab
|
||||
|
||||
.align 4
|
||||
|
||||
aes_enc_blk:
|
||||
ENTRY(aes_enc_blk)
|
||||
push %ebp
|
||||
mov ctx(%esp),%ebp
|
||||
|
||||
@@ -290,18 +287,15 @@ aes_enc_blk:
|
||||
mov %r0,(%ebp)
|
||||
pop %ebp
|
||||
ret
|
||||
ENDPROC(aes_enc_blk)
|
||||
|
||||
// AES (Rijndael) Decryption Subroutine
|
||||
/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
|
||||
|
||||
.global aes_dec_blk
|
||||
|
||||
.extern crypto_it_tab
|
||||
.extern crypto_il_tab
|
||||
|
||||
.align 4
|
||||
|
||||
aes_dec_blk:
|
||||
ENTRY(aes_dec_blk)
|
||||
push %ebp
|
||||
mov ctx(%esp),%ebp
|
||||
|
||||
@@ -365,3 +359,4 @@ aes_dec_blk:
|
||||
mov %r0,(%ebp)
|
||||
pop %ebp
|
||||
ret
|
||||
ENDPROC(aes_dec_blk)
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
.text
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define R1 %rax
|
||||
@@ -49,10 +50,8 @@
|
||||
#define R11 %r11
|
||||
|
||||
#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
|
||||
.global FUNC; \
|
||||
.type FUNC,@function; \
|
||||
.align 8; \
|
||||
FUNC: movq r1,r2; \
|
||||
ENTRY(FUNC); \
|
||||
movq r1,r2; \
|
||||
movq r3,r4; \
|
||||
leaq KEY+48(r8),r9; \
|
||||
movq r10,r11; \
|
||||
@@ -71,14 +70,15 @@ FUNC: movq r1,r2; \
|
||||
je B192; \
|
||||
leaq 32(r9),r9;
|
||||
|
||||
#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
|
||||
#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
|
||||
movq r1,r2; \
|
||||
movq r3,r4; \
|
||||
movl r5 ## E,(r9); \
|
||||
movl r6 ## E,4(r9); \
|
||||
movl r7 ## E,8(r9); \
|
||||
movl r8 ## E,12(r9); \
|
||||
ret;
|
||||
ret; \
|
||||
ENDPROC(FUNC);
|
||||
|
||||
#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
|
||||
movzbl r2 ## H,r5 ## E; \
|
||||
@@ -133,7 +133,7 @@ FUNC: movq r1,r2; \
|
||||
#define entry(FUNC,KEY,B128,B192) \
|
||||
prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
|
||||
|
||||
#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
|
||||
#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
|
||||
|
||||
#define encrypt_round(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
|
||||
@@ -151,12 +151,12 @@ FUNC: movq r1,r2; \
|
||||
|
||||
/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
|
||||
|
||||
entry(aes_enc_blk,0,enc128,enc192)
|
||||
entry(aes_enc_blk,0,.Le128,.Le192)
|
||||
encrypt_round(crypto_ft_tab,-96)
|
||||
encrypt_round(crypto_ft_tab,-80)
|
||||
enc192: encrypt_round(crypto_ft_tab,-64)
|
||||
.Le192: encrypt_round(crypto_ft_tab,-64)
|
||||
encrypt_round(crypto_ft_tab,-48)
|
||||
enc128: encrypt_round(crypto_ft_tab,-32)
|
||||
.Le128: encrypt_round(crypto_ft_tab,-32)
|
||||
encrypt_round(crypto_ft_tab,-16)
|
||||
encrypt_round(crypto_ft_tab, 0)
|
||||
encrypt_round(crypto_ft_tab, 16)
|
||||
@@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32)
|
||||
encrypt_round(crypto_ft_tab, 80)
|
||||
encrypt_round(crypto_ft_tab, 96)
|
||||
encrypt_final(crypto_fl_tab,112)
|
||||
return
|
||||
return(aes_enc_blk)
|
||||
|
||||
/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
|
||||
|
||||
entry(aes_dec_blk,240,dec128,dec192)
|
||||
entry(aes_dec_blk,240,.Ld128,.Ld192)
|
||||
decrypt_round(crypto_it_tab,-96)
|
||||
decrypt_round(crypto_it_tab,-80)
|
||||
dec192: decrypt_round(crypto_it_tab,-64)
|
||||
.Ld192: decrypt_round(crypto_it_tab,-64)
|
||||
decrypt_round(crypto_it_tab,-48)
|
||||
dec128: decrypt_round(crypto_it_tab,-32)
|
||||
.Ld128: decrypt_round(crypto_it_tab,-32)
|
||||
decrypt_round(crypto_it_tab,-16)
|
||||
decrypt_round(crypto_it_tab, 0)
|
||||
decrypt_round(crypto_it_tab, 16)
|
||||
@@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32)
|
||||
decrypt_round(crypto_it_tab, 80)
|
||||
decrypt_round(crypto_it_tab, 96)
|
||||
decrypt_final(crypto_il_tab,112)
|
||||
return
|
||||
return(aes_dec_blk)
|
||||
|
||||
@@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
||||
* poly = x^128 + x^127 + x^126 + x^121 + 1
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
ENTRY(aesni_gcm_dec)
|
||||
push %r12
|
||||
push %r13
|
||||
@@ -1437,6 +1436,7 @@ _return_T_done_decrypt:
|
||||
pop %r13
|
||||
pop %r12
|
||||
ret
|
||||
ENDPROC(aesni_gcm_dec)
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
@@ -1700,10 +1700,12 @@ _return_T_done_encrypt:
|
||||
pop %r13
|
||||
pop %r12
|
||||
ret
|
||||
ENDPROC(aesni_gcm_enc)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
.align 4
|
||||
_key_expansion_128:
|
||||
_key_expansion_256a:
|
||||
pshufd $0b11111111, %xmm1, %xmm1
|
||||
@@ -1715,6 +1717,8 @@ _key_expansion_256a:
|
||||
movaps %xmm0, (TKEYP)
|
||||
add $0x10, TKEYP
|
||||
ret
|
||||
ENDPROC(_key_expansion_128)
|
||||
ENDPROC(_key_expansion_256a)
|
||||
|
||||
.align 4
|
||||
_key_expansion_192a:
|
||||
@@ -1739,6 +1743,7 @@ _key_expansion_192a:
|
||||
movaps %xmm1, 0x10(TKEYP)
|
||||
add $0x20, TKEYP
|
||||
ret
|
||||
ENDPROC(_key_expansion_192a)
|
||||
|
||||
.align 4
|
||||
_key_expansion_192b:
|
||||
@@ -1758,6 +1763,7 @@ _key_expansion_192b:
|
||||
movaps %xmm0, (TKEYP)
|
||||
add $0x10, TKEYP
|
||||
ret
|
||||
ENDPROC(_key_expansion_192b)
|
||||
|
||||
.align 4
|
||||
_key_expansion_256b:
|
||||
@@ -1770,6 +1776,7 @@ _key_expansion_256b:
|
||||
movaps %xmm2, (TKEYP)
|
||||
add $0x10, TKEYP
|
||||
ret
|
||||
ENDPROC(_key_expansion_256b)
|
||||
|
||||
/*
|
||||
* int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
@@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key)
|
||||
popl KEYP
|
||||
#endif
|
||||
ret
|
||||
ENDPROC(aesni_set_key)
|
||||
|
||||
/*
|
||||
* void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
|
||||
@@ -1903,6 +1911,7 @@ ENTRY(aesni_enc)
|
||||
popl KEYP
|
||||
#endif
|
||||
ret
|
||||
ENDPROC(aesni_enc)
|
||||
|
||||
/*
|
||||
* _aesni_enc1: internal ABI
|
||||
@@ -1960,6 +1969,7 @@ _aesni_enc1:
|
||||
movaps 0x70(TKEYP), KEY
|
||||
AESENCLAST KEY STATE
|
||||
ret
|
||||
ENDPROC(_aesni_enc1)
|
||||
|
||||
/*
|
||||
* _aesni_enc4: internal ABI
|
||||
@@ -2068,6 +2078,7 @@ _aesni_enc4:
|
||||
AESENCLAST KEY STATE3
|
||||
AESENCLAST KEY STATE4
|
||||
ret
|
||||
ENDPROC(_aesni_enc4)
|
||||
|
||||
/*
|
||||
* void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
|
||||
@@ -2090,6 +2101,7 @@ ENTRY(aesni_dec)
|
||||
popl KEYP
|
||||
#endif
|
||||
ret
|
||||
ENDPROC(aesni_dec)
|
||||
|
||||
/*
|
||||
* _aesni_dec1: internal ABI
|
||||
@@ -2147,6 +2159,7 @@ _aesni_dec1:
|
||||
movaps 0x70(TKEYP), KEY
|
||||
AESDECLAST KEY STATE
|
||||
ret
|
||||
ENDPROC(_aesni_dec1)
|
||||
|
||||
/*
|
||||
* _aesni_dec4: internal ABI
|
||||
@@ -2255,6 +2268,7 @@ _aesni_dec4:
|
||||
AESDECLAST KEY STATE3
|
||||
AESDECLAST KEY STATE4
|
||||
ret
|
||||
ENDPROC(_aesni_dec4)
|
||||
|
||||
/*
|
||||
* void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
||||
@@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc)
|
||||
popl LEN
|
||||
#endif
|
||||
ret
|
||||
ENDPROC(aesni_ecb_enc)
|
||||
|
||||
/*
|
||||
* void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
||||
@@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec)
|
||||
popl LEN
|
||||
#endif
|
||||
ret
|
||||
ENDPROC(aesni_ecb_dec)
|
||||
|
||||
/*
|
||||
* void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
||||
@@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc)
|
||||
popl IVP
|
||||
#endif
|
||||
ret
|
||||
ENDPROC(aesni_cbc_enc)
|
||||
|
||||
/*
|
||||
* void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
||||
@@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec)
|
||||
popl IVP
|
||||
#endif
|
||||
ret
|
||||
ENDPROC(aesni_cbc_dec)
|
||||
|
||||
#ifdef __x86_64__
|
||||
.align 16
|
||||
@@ -2527,6 +2545,7 @@ _aesni_inc_init:
|
||||
MOVQ_R64_XMM TCTR_LOW INC
|
||||
MOVQ_R64_XMM CTR TCTR_LOW
|
||||
ret
|
||||
ENDPROC(_aesni_inc_init)
|
||||
|
||||
/*
|
||||
* _aesni_inc: internal ABI
|
||||
@@ -2555,6 +2574,7 @@ _aesni_inc:
|
||||
movaps CTR, IV
|
||||
PSHUFB_XMM BSWAP_MASK IV
|
||||
ret
|
||||
ENDPROC(_aesni_inc)
|
||||
|
||||
/*
|
||||
* void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
||||
@@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc)
|
||||
movups IV, (IVP)
|
||||
.Lctr_enc_just_ret:
|
||||
ret
|
||||
ENDPROC(aesni_ctr_enc)
|
||||
#endif
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "blowfish-x86_64-asm.S"
|
||||
.text
|
||||
|
||||
@@ -116,11 +118,7 @@
|
||||
bswapq RX0; \
|
||||
xorq RX0, (RIO);
|
||||
|
||||
.align 8
|
||||
.global __blowfish_enc_blk
|
||||
.type __blowfish_enc_blk,@function;
|
||||
|
||||
__blowfish_enc_blk:
|
||||
ENTRY(__blowfish_enc_blk)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -148,19 +146,16 @@ __blowfish_enc_blk:
|
||||
|
||||
movq %r10, RIO;
|
||||
test %cl, %cl;
|
||||
jnz __enc_xor;
|
||||
jnz .L__enc_xor;
|
||||
|
||||
write_block();
|
||||
ret;
|
||||
__enc_xor:
|
||||
.L__enc_xor:
|
||||
xor_block();
|
||||
ret;
|
||||
ENDPROC(__blowfish_enc_blk)
|
||||
|
||||
.align 8
|
||||
.global blowfish_dec_blk
|
||||
.type blowfish_dec_blk,@function;
|
||||
|
||||
blowfish_dec_blk:
|
||||
ENTRY(blowfish_dec_blk)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -189,6 +184,7 @@ blowfish_dec_blk:
|
||||
movq %r11, %rbp;
|
||||
|
||||
ret;
|
||||
ENDPROC(blowfish_dec_blk)
|
||||
|
||||
/**********************************************************************
|
||||
4-way blowfish, four blocks parallel
|
||||
@@ -300,11 +296,7 @@ blowfish_dec_blk:
|
||||
bswapq RX3; \
|
||||
xorq RX3, 24(RIO);
|
||||
|
||||
.align 8
|
||||
.global __blowfish_enc_blk_4way
|
||||
.type __blowfish_enc_blk_4way,@function;
|
||||
|
||||
__blowfish_enc_blk_4way:
|
||||
ENTRY(__blowfish_enc_blk_4way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -336,7 +328,7 @@ __blowfish_enc_blk_4way:
|
||||
movq %r11, RIO;
|
||||
|
||||
test %bpl, %bpl;
|
||||
jnz __enc_xor4;
|
||||
jnz .L__enc_xor4;
|
||||
|
||||
write_block4();
|
||||
|
||||
@@ -344,18 +336,15 @@ __blowfish_enc_blk_4way:
|
||||
popq %rbp;
|
||||
ret;
|
||||
|
||||
__enc_xor4:
|
||||
.L__enc_xor4:
|
||||
xor_block4();
|
||||
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
ret;
|
||||
ENDPROC(__blowfish_enc_blk_4way)
|
||||
|
||||
.align 8
|
||||
.global blowfish_dec_blk_4way
|
||||
.type blowfish_dec_blk_4way,@function;
|
||||
|
||||
blowfish_dec_blk_4way:
|
||||
ENTRY(blowfish_dec_blk_4way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -387,4 +376,4 @@ blowfish_dec_blk_4way:
|
||||
popq %rbp;
|
||||
|
||||
ret;
|
||||
|
||||
ENDPROC(blowfish_dec_blk_4way)
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
* http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#define CAMELLIA_TABLE_BYTE_LEN 272
|
||||
|
||||
/* struct camellia_ctx: */
|
||||
@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
|
||||
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
|
||||
%rcx, (%r9));
|
||||
ret;
|
||||
ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
|
||||
|
||||
.align 8
|
||||
roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
|
||||
@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
|
||||
%xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
|
||||
%rax, (%r9));
|
||||
ret;
|
||||
ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
|
||||
/*
|
||||
* IN/OUT:
|
||||
@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
|
||||
.text
|
||||
|
||||
.align 8
|
||||
.type __camellia_enc_blk16,@function;
|
||||
|
||||
__camellia_enc_blk16:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
@@ -793,10 +795,9 @@ __camellia_enc_blk16:
|
||||
%xmm15, %rax, %rcx, 24);
|
||||
|
||||
jmp .Lenc_done;
|
||||
ENDPROC(__camellia_enc_blk16)
|
||||
|
||||
.align 8
|
||||
.type __camellia_dec_blk16,@function;
|
||||
|
||||
__camellia_dec_blk16:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
@@ -877,12 +878,9 @@ __camellia_dec_blk16:
|
||||
((key_table + (24) * 8) + 4)(CTX));
|
||||
|
||||
jmp .Ldec_max24;
|
||||
ENDPROC(__camellia_dec_blk16)
|
||||
|
||||
.align 8
|
||||
.global camellia_ecb_enc_16way
|
||||
.type camellia_ecb_enc_16way,@function;
|
||||
|
||||
camellia_ecb_enc_16way:
|
||||
ENTRY(camellia_ecb_enc_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
@@ -903,12 +901,9 @@ camellia_ecb_enc_16way:
|
||||
%xmm8, %rsi);
|
||||
|
||||
ret;
|
||||
ENDPROC(camellia_ecb_enc_16way)
|
||||
|
||||
.align 8
|
||||
.global camellia_ecb_dec_16way
|
||||
.type camellia_ecb_dec_16way,@function;
|
||||
|
||||
camellia_ecb_dec_16way:
|
||||
ENTRY(camellia_ecb_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
@@ -934,12 +929,9 @@ camellia_ecb_dec_16way:
|
||||
%xmm8, %rsi);
|
||||
|
||||
ret;
|
||||
ENDPROC(camellia_ecb_dec_16way)
|
||||
|
||||
.align 8
|
||||
.global camellia_cbc_dec_16way
|
||||
.type camellia_cbc_dec_16way,@function;
|
||||
|
||||
camellia_cbc_dec_16way:
|
||||
ENTRY(camellia_cbc_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
@@ -986,6 +978,7 @@ camellia_cbc_dec_16way:
|
||||
%xmm8, %rsi);
|
||||
|
||||
ret;
|
||||
ENDPROC(camellia_cbc_dec_16way)
|
||||
|
||||
#define inc_le128(x, minus_one, tmp) \
|
||||
vpcmpeqq minus_one, x, tmp; \
|
||||
@@ -993,11 +986,7 @@ camellia_cbc_dec_16way:
|
||||
vpslldq $8, tmp, tmp; \
|
||||
vpsubq tmp, x, x;
|
||||
|
||||
.align 8
|
||||
.global camellia_ctr_16way
|
||||
.type camellia_ctr_16way,@function;
|
||||
|
||||
camellia_ctr_16way:
|
||||
ENTRY(camellia_ctr_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
@@ -1100,3 +1089,4 @@ camellia_ctr_16way:
|
||||
%xmm8, %rsi);
|
||||
|
||||
ret;
|
||||
ENDPROC(camellia_ctr_16way)
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "camellia-x86_64-asm_64.S"
|
||||
.text
|
||||
|
||||
@@ -188,10 +190,7 @@
|
||||
bswapq RAB0; \
|
||||
movq RAB0, 4*2(RIO);
|
||||
|
||||
.global __camellia_enc_blk;
|
||||
.type __camellia_enc_blk,@function;
|
||||
|
||||
__camellia_enc_blk:
|
||||
ENTRY(__camellia_enc_blk)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -214,33 +213,31 @@ __camellia_enc_blk:
|
||||
movl $24, RT1d; /* max */
|
||||
|
||||
cmpb $16, key_length(CTX);
|
||||
je __enc_done;
|
||||
je .L__enc_done;
|
||||
|
||||
enc_fls(24);
|
||||
enc_rounds(24);
|
||||
movl $32, RT1d; /* max */
|
||||
|
||||
__enc_done:
|
||||
.L__enc_done:
|
||||
testb RXORbl, RXORbl;
|
||||
movq RDST, RIO;
|
||||
|
||||
jnz __enc_xor;
|
||||
jnz .L__enc_xor;
|
||||
|
||||
enc_outunpack(mov, RT1);
|
||||
|
||||
movq RRBP, %rbp;
|
||||
ret;
|
||||
|
||||
__enc_xor:
|
||||
.L__enc_xor:
|
||||
enc_outunpack(xor, RT1);
|
||||
|
||||
movq RRBP, %rbp;
|
||||
ret;
|
||||
ENDPROC(__camellia_enc_blk)
|
||||
|
||||
.global camellia_dec_blk;
|
||||
.type camellia_dec_blk,@function;
|
||||
|
||||
camellia_dec_blk:
|
||||
ENTRY(camellia_dec_blk)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -258,12 +255,12 @@ camellia_dec_blk:
|
||||
dec_inpack(RT2);
|
||||
|
||||
cmpb $24, RT2bl;
|
||||
je __dec_rounds16;
|
||||
je .L__dec_rounds16;
|
||||
|
||||
dec_rounds(24);
|
||||
dec_fls(24);
|
||||
|
||||
__dec_rounds16:
|
||||
.L__dec_rounds16:
|
||||
dec_rounds(16);
|
||||
dec_fls(16);
|
||||
dec_rounds(8);
|
||||
@@ -276,6 +273,7 @@ __dec_rounds16:
|
||||
|
||||
movq RRBP, %rbp;
|
||||
ret;
|
||||
ENDPROC(camellia_dec_blk)
|
||||
|
||||
/**********************************************************************
|
||||
2-way camellia
|
||||
@@ -426,10 +424,7 @@ __dec_rounds16:
|
||||
bswapq RAB1; \
|
||||
movq RAB1, 12*2(RIO);
|
||||
|
||||
.global __camellia_enc_blk_2way;
|
||||
.type __camellia_enc_blk_2way,@function;
|
||||
|
||||
__camellia_enc_blk_2way:
|
||||
ENTRY(__camellia_enc_blk_2way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -453,16 +448,16 @@ __camellia_enc_blk_2way:
|
||||
movl $24, RT2d; /* max */
|
||||
|
||||
cmpb $16, key_length(CTX);
|
||||
je __enc2_done;
|
||||
je .L__enc2_done;
|
||||
|
||||
enc_fls2(24);
|
||||
enc_rounds2(24);
|
||||
movl $32, RT2d; /* max */
|
||||
|
||||
__enc2_done:
|
||||
.L__enc2_done:
|
||||
test RXORbl, RXORbl;
|
||||
movq RDST, RIO;
|
||||
jnz __enc2_xor;
|
||||
jnz .L__enc2_xor;
|
||||
|
||||
enc_outunpack2(mov, RT2);
|
||||
|
||||
@@ -470,17 +465,15 @@ __enc2_done:
|
||||
popq %rbx;
|
||||
ret;
|
||||
|
||||
__enc2_xor:
|
||||
.L__enc2_xor:
|
||||
enc_outunpack2(xor, RT2);
|
||||
|
||||
movq RRBP, %rbp;
|
||||
popq %rbx;
|
||||
ret;
|
||||
ENDPROC(__camellia_enc_blk_2way)
|
||||
|
||||
.global camellia_dec_blk_2way;
|
||||
.type camellia_dec_blk_2way,@function;
|
||||
|
||||
camellia_dec_blk_2way:
|
||||
ENTRY(camellia_dec_blk_2way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -499,12 +492,12 @@ camellia_dec_blk_2way:
|
||||
dec_inpack2(RT2);
|
||||
|
||||
cmpb $24, RT2bl;
|
||||
je __dec2_rounds16;
|
||||
je .L__dec2_rounds16;
|
||||
|
||||
dec_rounds2(24);
|
||||
dec_fls2(24);
|
||||
|
||||
__dec2_rounds16:
|
||||
.L__dec2_rounds16:
|
||||
dec_rounds2(16);
|
||||
dec_fls2(16);
|
||||
dec_rounds2(8);
|
||||
@@ -518,3 +511,4 @@ __dec2_rounds16:
|
||||
movq RRBP, %rbp;
|
||||
movq RXOR, %rbx;
|
||||
ret;
|
||||
ENDPROC(camellia_dec_blk_2way)
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "cast5-avx-x86_64-asm_64.S"
|
||||
|
||||
.extern cast_s1
|
||||
@@ -211,8 +213,6 @@
|
||||
.text
|
||||
|
||||
.align 16
|
||||
.type __cast5_enc_blk16,@function;
|
||||
|
||||
__cast5_enc_blk16:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
@@ -263,14 +263,14 @@ __cast5_enc_blk16:
|
||||
|
||||
movzbl rr(CTX), %eax;
|
||||
testl %eax, %eax;
|
||||
jnz __skip_enc;
|
||||
jnz .L__skip_enc;
|
||||
|
||||
round(RL, RR, 12, 1);
|
||||
round(RR, RL, 13, 2);
|
||||
round(RL, RR, 14, 3);
|
||||
round(RR, RL, 15, 1);
|
||||
|
||||
__skip_enc:
|
||||
.L__skip_enc:
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
|
||||
@@ -282,10 +282,9 @@ __skip_enc:
|
||||
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
|
||||
|
||||
ret;
|
||||
ENDPROC(__cast5_enc_blk16)
|
||||
|
||||
.align 16
|
||||
.type __cast5_dec_blk16,@function;
|
||||
|
||||
__cast5_dec_blk16:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
@@ -323,14 +322,14 @@ __cast5_dec_blk16:
|
||||
|
||||
movzbl rr(CTX), %eax;
|
||||
testl %eax, %eax;
|
||||
jnz __skip_dec;
|
||||
jnz .L__skip_dec;
|
||||
|
||||
round(RL, RR, 15, 1);
|
||||
round(RR, RL, 14, 3);
|
||||
round(RL, RR, 13, 2);
|
||||
round(RR, RL, 12, 1);
|
||||
|
||||
__dec_tail:
|
||||
.L__dec_tail:
|
||||
round(RL, RR, 11, 3);
|
||||
round(RR, RL, 10, 2);
|
||||
round(RL, RR, 9, 1);
|
||||
@@ -355,15 +354,12 @@ __dec_tail:
|
||||
|
||||
ret;
|
||||
|
||||
__skip_dec:
|
||||
.L__skip_dec:
|
||||
vpsrldq $4, RKR, RKR;
|
||||
jmp __dec_tail;
|
||||
jmp .L__dec_tail;
|
||||
ENDPROC(__cast5_dec_blk16)
|
||||
|
||||
.align 16
|
||||
.global cast5_ecb_enc_16way
|
||||
.type cast5_ecb_enc_16way,@function;
|
||||
|
||||
cast5_ecb_enc_16way:
|
||||
ENTRY(cast5_ecb_enc_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -393,12 +389,9 @@ cast5_ecb_enc_16way:
|
||||
vmovdqu RL4, (7*4*4)(%r11);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast5_ecb_enc_16way)
|
||||
|
||||
.align 16
|
||||
.global cast5_ecb_dec_16way
|
||||
.type cast5_ecb_dec_16way,@function;
|
||||
|
||||
cast5_ecb_dec_16way:
|
||||
ENTRY(cast5_ecb_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -428,12 +421,9 @@ cast5_ecb_dec_16way:
|
||||
vmovdqu RL4, (7*4*4)(%r11);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast5_ecb_dec_16way)
|
||||
|
||||
.align 16
|
||||
.global cast5_cbc_dec_16way
|
||||
.type cast5_cbc_dec_16way,@function;
|
||||
|
||||
cast5_cbc_dec_16way:
|
||||
ENTRY(cast5_cbc_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -480,12 +470,9 @@ cast5_cbc_dec_16way:
|
||||
popq %r12;
|
||||
|
||||
ret;
|
||||
ENDPROC(cast5_cbc_dec_16way)
|
||||
|
||||
.align 16
|
||||
.global cast5_ctr_16way
|
||||
.type cast5_ctr_16way,@function;
|
||||
|
||||
cast5_ctr_16way:
|
||||
ENTRY(cast5_ctr_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -556,3 +543,4 @@ cast5_ctr_16way:
|
||||
popq %r12;
|
||||
|
||||
ret;
|
||||
ENDPROC(cast5_ctr_16way)
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "glue_helper-asm-avx.S"
|
||||
|
||||
.file "cast6-avx-x86_64-asm_64.S"
|
||||
@@ -250,8 +251,6 @@
|
||||
.text
|
||||
|
||||
.align 8
|
||||
.type __cast6_enc_blk8,@function;
|
||||
|
||||
__cast6_enc_blk8:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
@@ -295,10 +294,9 @@ __cast6_enc_blk8:
|
||||
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
|
||||
|
||||
ret;
|
||||
ENDPROC(__cast6_enc_blk8)
|
||||
|
||||
.align 8
|
||||
.type __cast6_dec_blk8,@function;
|
||||
|
||||
__cast6_dec_blk8:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
@@ -341,12 +339,9 @@ __cast6_dec_blk8:
|
||||
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
|
||||
|
||||
ret;
|
||||
ENDPROC(__cast6_dec_blk8)
|
||||
|
||||
.align 8
|
||||
.global cast6_ecb_enc_8way
|
||||
.type cast6_ecb_enc_8way,@function;
|
||||
|
||||
cast6_ecb_enc_8way:
|
||||
ENTRY(cast6_ecb_enc_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -362,12 +357,9 @@ cast6_ecb_enc_8way:
|
||||
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_ecb_enc_8way)
|
||||
|
||||
.align 8
|
||||
.global cast6_ecb_dec_8way
|
||||
.type cast6_ecb_dec_8way,@function;
|
||||
|
||||
cast6_ecb_dec_8way:
|
||||
ENTRY(cast6_ecb_dec_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -383,12 +375,9 @@ cast6_ecb_dec_8way:
|
||||
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_ecb_dec_8way)
|
||||
|
||||
.align 8
|
||||
.global cast6_cbc_dec_8way
|
||||
.type cast6_cbc_dec_8way,@function;
|
||||
|
||||
cast6_cbc_dec_8way:
|
||||
ENTRY(cast6_cbc_dec_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -409,12 +398,9 @@ cast6_cbc_dec_8way:
|
||||
popq %r12;
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_cbc_dec_8way)
|
||||
|
||||
.align 8
|
||||
.global cast6_ctr_8way
|
||||
.type cast6_ctr_8way,@function;
|
||||
|
||||
cast6_ctr_8way:
|
||||
ENTRY(cast6_ctr_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -437,3 +423,4 @@ cast6_ctr_8way:
|
||||
popq %r12;
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_ctr_8way)
|
||||
|
||||
246
arch/x86/crypto/crc32-pclmul_asm.S
Normal file
246
arch/x86/crypto/crc32-pclmul_asm.S
Normal file
@@ -0,0 +1,246 @@
|
||||
/* GPL HEADER START
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 only,
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License version 2 for more details (a copy is included
|
||||
* in the LICENSE file that accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; If not, see http://www.gnu.org/licenses
|
||||
*
|
||||
* Please visit http://www.xyratex.com/contact if you need additional
|
||||
* information or have any questions.
|
||||
*
|
||||
* GPL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2012 Xyratex Technology Limited
|
||||
*
|
||||
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
|
||||
* calculation.
|
||||
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
|
||||
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
|
||||
* at:
|
||||
* http://www.intel.com/products/processor/manuals/
|
||||
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
|
||||
* Volume 2B: Instruction Set Reference, N-Z
|
||||
*
|
||||
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
|
||||
* Alexander Boyko <Alexander_Boyko@xyratex.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/inst.h>
|
||||
|
||||
|
||||
.align 16
|
||||
/*
|
||||
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
|
||||
* #define CONSTANT_R1 0x154442bd4LL
|
||||
*
|
||||
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
|
||||
* #define CONSTANT_R2 0x1c6e41596LL
|
||||
*/
|
||||
.Lconstant_R2R1:
|
||||
.octa 0x00000001c6e415960000000154442bd4
|
||||
/*
|
||||
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
|
||||
* #define CONSTANT_R3 0x1751997d0LL
|
||||
*
|
||||
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
|
||||
* #define CONSTANT_R4 0x0ccaa009eLL
|
||||
*/
|
||||
.Lconstant_R4R3:
|
||||
.octa 0x00000000ccaa009e00000001751997d0
|
||||
/*
|
||||
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
|
||||
* #define CONSTANT_R5 0x163cd6124LL
|
||||
*/
|
||||
.Lconstant_R5:
|
||||
.octa 0x00000000000000000000000163cd6124
|
||||
.Lconstant_mask32:
|
||||
.octa 0x000000000000000000000000FFFFFFFF
|
||||
/*
|
||||
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
|
||||
*
|
||||
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
|
||||
* #define CONSTANT_RU 0x1F7011641LL
|
||||
*/
|
||||
.Lconstant_RUpoly:
|
||||
.octa 0x00000001F701164100000001DB710641
|
||||
|
||||
#define CONSTANT %xmm0
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define BUF %rdi
|
||||
#define LEN %rsi
|
||||
#define CRC %edx
|
||||
#else
|
||||
#define BUF %eax
|
||||
#define LEN %edx
|
||||
#define CRC %ecx
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
.text
|
||||
/**
|
||||
* Calculate crc32
|
||||
* BUF - buffer (16 bytes aligned)
|
||||
* LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
|
||||
* CRC - initial crc32
|
||||
* return %eax crc32
|
||||
* uint crc32_pclmul_le_16(unsigned char const *buffer,
|
||||
* size_t len, uint crc32)
|
||||
*/
|
||||
.globl crc32_pclmul_le_16
|
||||
.align 4, 0x90
|
||||
crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
|
||||
movdqa (BUF), %xmm1
|
||||
movdqa 0x10(BUF), %xmm2
|
||||
movdqa 0x20(BUF), %xmm3
|
||||
movdqa 0x30(BUF), %xmm4
|
||||
movd CRC, CONSTANT
|
||||
pxor CONSTANT, %xmm1
|
||||
sub $0x40, LEN
|
||||
add $0x40, BUF
|
||||
#ifndef __x86_64__
|
||||
/* This is for position independent code(-fPIC) support for 32bit */
|
||||
call delta
|
||||
delta:
|
||||
pop %ecx
|
||||
#endif
|
||||
cmp $0x40, LEN
|
||||
jb less_64
|
||||
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R2R1(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
|
||||
#endif
|
||||
|
||||
loop_64:/* 64 bytes Full cache line folding */
|
||||
prefetchnta 0x40(BUF)
|
||||
movdqa %xmm1, %xmm5
|
||||
movdqa %xmm2, %xmm6
|
||||
movdqa %xmm3, %xmm7
|
||||
#ifdef __x86_64__
|
||||
movdqa %xmm4, %xmm8
|
||||
#endif
|
||||
PCLMULQDQ 00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 00, CONSTANT, %xmm2
|
||||
PCLMULQDQ 00, CONSTANT, %xmm3
|
||||
#ifdef __x86_64__
|
||||
PCLMULQDQ 00, CONSTANT, %xmm4
|
||||
#endif
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm6
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm7
|
||||
#ifdef __x86_64__
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm8
|
||||
#endif
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
#ifdef __x86_64__
|
||||
pxor %xmm8, %xmm4
|
||||
#else
|
||||
/* xmm8 unsupported for x32 */
|
||||
movdqa %xmm4, %xmm5
|
||||
PCLMULQDQ 00, CONSTANT, %xmm4
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm4
|
||||
#endif
|
||||
|
||||
pxor (BUF), %xmm1
|
||||
pxor 0x10(BUF), %xmm2
|
||||
pxor 0x20(BUF), %xmm3
|
||||
pxor 0x30(BUF), %xmm4
|
||||
|
||||
sub $0x40, LEN
|
||||
add $0x40, BUF
|
||||
cmp $0x40, LEN
|
||||
jge loop_64
|
||||
less_64:/* Folding cache line into 128bit */
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R4R3(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT
|
||||
#endif
|
||||
prefetchnta (BUF)
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm3, %xmm1
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm4, %xmm1
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb fold_64
|
||||
loop_16:/* Folding rest buffer into 128bit */
|
||||
movdqa %xmm1, %xmm5
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor (BUF), %xmm1
|
||||
sub $0x10, LEN
|
||||
add $0x10, BUF
|
||||
cmp $0x10, LEN
|
||||
jge loop_16
|
||||
|
||||
fold_64:
|
||||
/* perform the last 64 bit fold, also adds 32 zeroes
|
||||
* to the input stream */
|
||||
PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
|
||||
psrldq $0x08, %xmm1
|
||||
pxor CONSTANT, %xmm1
|
||||
|
||||
/* final 32-bit fold */
|
||||
movdqa %xmm1, %xmm2
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R5(%rip), CONSTANT
|
||||
movdqa .Lconstant_mask32(%rip), %xmm3
|
||||
#else
|
||||
movdqa .Lconstant_R5 - delta(%ecx), CONSTANT
|
||||
movdqa .Lconstant_mask32 - delta(%ecx), %xmm3
|
||||
#endif
|
||||
psrldq $0x04, %xmm2
|
||||
pand %xmm3, %xmm1
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
|
||||
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_RUpoly(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT
|
||||
#endif
|
||||
movdqa %xmm1, %xmm2
|
||||
pand %xmm3, %xmm1
|
||||
PCLMULQDQ 0x10, CONSTANT, %xmm1
|
||||
pand %xmm3, %xmm1
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
pextrd $0x01, %xmm1, %eax
|
||||
|
||||
ret
|
||||
201
arch/x86/crypto/crc32-pclmul_glue.c
Normal file
201
arch/x86/crypto/crc32-pclmul_glue.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/* GPL HEADER START
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 only,
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License version 2 for more details (a copy is included
|
||||
* in the LICENSE file that accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; If not, see http://www.gnu.org/licenses
|
||||
*
|
||||
* Please visit http://www.xyratex.com/contact if you need additional
|
||||
* information or have any questions.
|
||||
*
|
||||
* GPL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2012 Xyratex Technology Limited
|
||||
*
|
||||
* Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/i387.h>
|
||||
|
||||
#define CHKSUM_BLOCK_SIZE 1
|
||||
#define CHKSUM_DIGEST_SIZE 4
|
||||
|
||||
#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
|
||||
* for crc32_pclmul_le_16 */
|
||||
#define SCALE_F 16L /* size of xmm register */
|
||||
#define SCALE_F_MASK (SCALE_F - 1)
|
||||
|
||||
u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
|
||||
|
||||
static u32 __attribute__((pure))
|
||||
crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
|
||||
{
|
||||
unsigned int iquotient;
|
||||
unsigned int iremainder;
|
||||
unsigned int prealign;
|
||||
|
||||
if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
|
||||
return crc32_le(crc, p, len);
|
||||
|
||||
if ((long)p & SCALE_F_MASK) {
|
||||
/* align p to 16 byte */
|
||||
prealign = SCALE_F - ((long)p & SCALE_F_MASK);
|
||||
|
||||
crc = crc32_le(crc, p, prealign);
|
||||
len -= prealign;
|
||||
p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
|
||||
~SCALE_F_MASK);
|
||||
}
|
||||
iquotient = len & (~SCALE_F_MASK);
|
||||
iremainder = len & SCALE_F_MASK;
|
||||
|
||||
kernel_fpu_begin();
|
||||
crc = crc32_pclmul_le_16(p, iquotient, crc);
|
||||
kernel_fpu_end();
|
||||
|
||||
if (iremainder)
|
||||
crc = crc32_le(crc, p + iquotient, iremainder);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
u32 *key = crypto_tfm_ctx(tfm);
|
||||
|
||||
*key = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(hash);
|
||||
|
||||
if (keylen != sizeof(u32)) {
|
||||
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
*mctx = le32_to_cpup((__le32 *)key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_init(struct shash_desc *desc)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(desc->tfm);
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*crcp = *mctx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*crcp = crc32_pclmul_le(*crcp, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* No final XOR 0xFFFFFFFF, like crc32_le */
|
||||
static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
*(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
|
||||
}
|
||||
|
||||
static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*(__le32 *)out = cpu_to_le32p(crcp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
|
||||
out);
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.setkey = crc32_pclmul_setkey,
|
||||
.init = crc32_pclmul_init,
|
||||
.update = crc32_pclmul_update,
|
||||
.final = crc32_pclmul_final,
|
||||
.finup = crc32_pclmul_finup,
|
||||
.digest = crc32_pclmul_digest,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = CHKSUM_DIGEST_SIZE,
|
||||
.base = {
|
||||
.cra_name = "crc32",
|
||||
.cra_driver_name = "crc32-pclmul",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(u32),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32_pclmul_cra_init,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
|
||||
|
||||
|
||||
static int __init crc32_pclmul_mod_init(void)
|
||||
{
|
||||
|
||||
if (!x86_match_cpu(crc32pclmul_cpu_id)) {
|
||||
pr_info("PCLMULQDQ-NI instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit crc32_pclmul_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(crc32_pclmul_mod_init);
|
||||
module_exit(crc32_pclmul_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
MODULE_ALIAS("crc32");
|
||||
MODULE_ALIAS("crc32-pclmul");
|
||||
@@ -42,6 +42,8 @@
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
|
||||
|
||||
.macro LABEL prefix n
|
||||
@@ -68,8 +70,7 @@
|
||||
|
||||
# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
|
||||
|
||||
.global crc_pcl
|
||||
crc_pcl:
|
||||
ENTRY(crc_pcl)
|
||||
#define bufp %rdi
|
||||
#define bufp_dw %edi
|
||||
#define bufp_w %di
|
||||
@@ -323,6 +324,9 @@ JMPTBL_ENTRY %i
|
||||
.noaltmacro
|
||||
i=i+1
|
||||
.endr
|
||||
|
||||
ENDPROC(crc_pcl)
|
||||
|
||||
################################################################
|
||||
## PCLMULQDQ tables
|
||||
## Table is 128 entries x 2 quad words each
|
||||
|
||||
@@ -94,6 +94,7 @@ __clmul_gf128mul_ble:
|
||||
pxor T2, T1
|
||||
pxor T1, DATA
|
||||
ret
|
||||
ENDPROC(__clmul_gf128mul_ble)
|
||||
|
||||
/* void clmul_ghash_mul(char *dst, const be128 *shash) */
|
||||
ENTRY(clmul_ghash_mul)
|
||||
@@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul)
|
||||
PSHUFB_XMM BSWAP DATA
|
||||
movups DATA, (%rdi)
|
||||
ret
|
||||
ENDPROC(clmul_ghash_mul)
|
||||
|
||||
/*
|
||||
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
|
||||
@@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update)
|
||||
movups DATA, (%rdi)
|
||||
.Lupdate_just_ret:
|
||||
ret
|
||||
ENDPROC(clmul_ghash_update)
|
||||
|
||||
/*
|
||||
* void clmul_ghash_setkey(be128 *shash, const u8 *key);
|
||||
@@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey)
|
||||
pxor %xmm1, %xmm0
|
||||
movups %xmm0, (%rdi)
|
||||
ret
|
||||
ENDPROC(clmul_ghash_setkey)
|
||||
|
||||
@@ -2,11 +2,12 @@
|
||||
# D. J. Bernstein
|
||||
# Public domain.
|
||||
|
||||
# enter ECRYPT_encrypt_bytes
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.p2align 5
|
||||
.globl ECRYPT_encrypt_bytes
|
||||
ECRYPT_encrypt_bytes:
|
||||
|
||||
# enter salsa20_encrypt_bytes
|
||||
ENTRY(salsa20_encrypt_bytes)
|
||||
mov %esp,%eax
|
||||
and $31,%eax
|
||||
add $256,%eax
|
||||
@@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes:
|
||||
add $64,%esi
|
||||
# goto bytesatleast1
|
||||
jmp ._bytesatleast1
|
||||
# enter ECRYPT_keysetup
|
||||
.text
|
||||
.p2align 5
|
||||
.globl ECRYPT_keysetup
|
||||
ECRYPT_keysetup:
|
||||
ENDPROC(salsa20_encrypt_bytes)
|
||||
|
||||
# enter salsa20_keysetup
|
||||
ENTRY(salsa20_keysetup)
|
||||
mov %esp,%eax
|
||||
and $31,%eax
|
||||
add $256,%eax
|
||||
@@ -1060,11 +1060,10 @@ ECRYPT_keysetup:
|
||||
# leave
|
||||
add %eax,%esp
|
||||
ret
|
||||
# enter ECRYPT_ivsetup
|
||||
.text
|
||||
.p2align 5
|
||||
.globl ECRYPT_ivsetup
|
||||
ECRYPT_ivsetup:
|
||||
ENDPROC(salsa20_keysetup)
|
||||
|
||||
# enter salsa20_ivsetup
|
||||
ENTRY(salsa20_ivsetup)
|
||||
mov %esp,%eax
|
||||
and $31,%eax
|
||||
add $256,%eax
|
||||
@@ -1112,3 +1111,4 @@ ECRYPT_ivsetup:
|
||||
# leave
|
||||
add %eax,%esp
|
||||
ret
|
||||
ENDPROC(salsa20_ivsetup)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# enter ECRYPT_encrypt_bytes
|
||||
.text
|
||||
.p2align 5
|
||||
.globl ECRYPT_encrypt_bytes
|
||||
ECRYPT_encrypt_bytes:
|
||||
#include <linux/linkage.h>
|
||||
|
||||
# enter salsa20_encrypt_bytes
|
||||
ENTRY(salsa20_encrypt_bytes)
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $256,%r11
|
||||
@@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes:
|
||||
# comment:fp stack unchanged by jump
|
||||
# goto bytesatleast1
|
||||
jmp ._bytesatleast1
|
||||
# enter ECRYPT_keysetup
|
||||
.text
|
||||
.p2align 5
|
||||
.globl ECRYPT_keysetup
|
||||
ECRYPT_keysetup:
|
||||
ENDPROC(salsa20_encrypt_bytes)
|
||||
|
||||
# enter salsa20_keysetup
|
||||
ENTRY(salsa20_keysetup)
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $256,%r11
|
||||
@@ -892,11 +890,10 @@ ECRYPT_keysetup:
|
||||
mov %rdi,%rax
|
||||
mov %rsi,%rdx
|
||||
ret
|
||||
# enter ECRYPT_ivsetup
|
||||
.text
|
||||
.p2align 5
|
||||
.globl ECRYPT_ivsetup
|
||||
ECRYPT_ivsetup:
|
||||
ENDPROC(salsa20_keysetup)
|
||||
|
||||
# enter salsa20_ivsetup
|
||||
ENTRY(salsa20_ivsetup)
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $256,%r11
|
||||
@@ -918,3 +915,4 @@ ECRYPT_ivsetup:
|
||||
mov %rdi,%rax
|
||||
mov %rsi,%rdx
|
||||
ret
|
||||
ENDPROC(salsa20_ivsetup)
|
||||
|
||||
@@ -26,11 +26,6 @@
|
||||
#define SALSA20_MIN_KEY_SIZE 16U
|
||||
#define SALSA20_MAX_KEY_SIZE 32U
|
||||
|
||||
// use the ECRYPT_* function names
|
||||
#define salsa20_keysetup ECRYPT_keysetup
|
||||
#define salsa20_ivsetup ECRYPT_ivsetup
|
||||
#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
|
||||
|
||||
struct salsa20_ctx
|
||||
{
|
||||
u32 input[16];
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "glue_helper-asm-avx.S"
|
||||
|
||||
.file "serpent-avx-x86_64-asm_64.S"
|
||||
@@ -566,8 +567,6 @@
|
||||
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
|
||||
|
||||
.align 8
|
||||
.type __serpent_enc_blk8_avx,@function;
|
||||
|
||||
__serpent_enc_blk8_avx:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
@@ -619,10 +618,9 @@ __serpent_enc_blk8_avx:
|
||||
write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
|
||||
|
||||
ret;
|
||||
ENDPROC(__serpent_enc_blk8_avx)
|
||||
|
||||
.align 8
|
||||
.type __serpent_dec_blk8_avx,@function;
|
||||
|
||||
__serpent_dec_blk8_avx:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
@@ -674,12 +672,9 @@ __serpent_dec_blk8_avx:
|
||||
write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
|
||||
|
||||
ret;
|
||||
ENDPROC(__serpent_dec_blk8_avx)
|
||||
|
||||
.align 8
|
||||
.global serpent_ecb_enc_8way_avx
|
||||
.type serpent_ecb_enc_8way_avx,@function;
|
||||
|
||||
serpent_ecb_enc_8way_avx:
|
||||
ENTRY(serpent_ecb_enc_8way_avx)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx:
|
||||
store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
ret;
|
||||
ENDPROC(serpent_ecb_enc_8way_avx)
|
||||
|
||||
.align 8
|
||||
.global serpent_ecb_dec_8way_avx
|
||||
.type serpent_ecb_dec_8way_avx,@function;
|
||||
|
||||
serpent_ecb_dec_8way_avx:
|
||||
ENTRY(serpent_ecb_dec_8way_avx)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx:
|
||||
store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
|
||||
|
||||
ret;
|
||||
ENDPROC(serpent_ecb_dec_8way_avx)
|
||||
|
||||
.align 8
|
||||
.global serpent_cbc_dec_8way_avx
|
||||
.type serpent_cbc_dec_8way_avx,@function;
|
||||
|
||||
serpent_cbc_dec_8way_avx:
|
||||
ENTRY(serpent_cbc_dec_8way_avx)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx:
|
||||
store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
|
||||
|
||||
ret;
|
||||
ENDPROC(serpent_cbc_dec_8way_avx)
|
||||
|
||||
.align 8
|
||||
.global serpent_ctr_8way_avx
|
||||
.type serpent_ctr_8way_avx,@function;
|
||||
|
||||
serpent_ctr_8way_avx:
|
||||
ENTRY(serpent_ctr_8way_avx)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -752,3 +738,4 @@ serpent_ctr_8way_avx:
|
||||
store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
ret;
|
||||
ENDPROC(serpent_ctr_8way_avx)
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "serpent-sse2-i586-asm_32.S"
|
||||
.text
|
||||
|
||||
@@ -510,11 +512,7 @@
|
||||
pxor t0, x3; \
|
||||
movdqu x3, (3*4*4)(out);
|
||||
|
||||
.align 8
|
||||
.global __serpent_enc_blk_4way
|
||||
.type __serpent_enc_blk_4way,@function;
|
||||
|
||||
__serpent_enc_blk_4way:
|
||||
ENTRY(__serpent_enc_blk_4way)
|
||||
/* input:
|
||||
* arg_ctx(%esp): ctx, CTX
|
||||
* arg_dst(%esp): dst
|
||||
@@ -566,22 +564,19 @@ __serpent_enc_blk_4way:
|
||||
movl arg_dst(%esp), %eax;
|
||||
|
||||
cmpb $0, arg_xor(%esp);
|
||||
jnz __enc_xor4;
|
||||
jnz .L__enc_xor4;
|
||||
|
||||
write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
|
||||
|
||||
ret;
|
||||
|
||||
__enc_xor4:
|
||||
.L__enc_xor4:
|
||||
xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
|
||||
|
||||
ret;
|
||||
ENDPROC(__serpent_enc_blk_4way)
|
||||
|
||||
.align 8
|
||||
.global serpent_dec_blk_4way
|
||||
.type serpent_dec_blk_4way,@function;
|
||||
|
||||
serpent_dec_blk_4way:
|
||||
ENTRY(serpent_dec_blk_4way)
|
||||
/* input:
|
||||
* arg_ctx(%esp): ctx, CTX
|
||||
* arg_dst(%esp): dst
|
||||
@@ -633,3 +628,4 @@ serpent_dec_blk_4way:
|
||||
write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
|
||||
|
||||
ret;
|
||||
ENDPROC(serpent_dec_blk_4way)
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "serpent-sse2-x86_64-asm_64.S"
|
||||
.text
|
||||
|
||||
@@ -632,11 +634,7 @@
|
||||
pxor t0, x3; \
|
||||
movdqu x3, (3*4*4)(out);
|
||||
|
||||
.align 8
|
||||
.global __serpent_enc_blk_8way
|
||||
.type __serpent_enc_blk_8way,@function;
|
||||
|
||||
__serpent_enc_blk_8way:
|
||||
ENTRY(__serpent_enc_blk_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -687,24 +685,21 @@ __serpent_enc_blk_8way:
|
||||
leaq (4*4*4)(%rsi), %rax;
|
||||
|
||||
testb %cl, %cl;
|
||||
jnz __enc_xor8;
|
||||
jnz .L__enc_xor8;
|
||||
|
||||
write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
|
||||
write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
|
||||
|
||||
ret;
|
||||
|
||||
__enc_xor8:
|
||||
.L__enc_xor8:
|
||||
xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
|
||||
xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
|
||||
|
||||
ret;
|
||||
ENDPROC(__serpent_enc_blk_8way)
|
||||
|
||||
.align 8
|
||||
.global serpent_dec_blk_8way
|
||||
.type serpent_dec_blk_8way,@function;
|
||||
|
||||
serpent_dec_blk_8way:
|
||||
ENTRY(serpent_dec_blk_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
@@ -756,3 +751,4 @@ serpent_dec_blk_8way:
|
||||
write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
|
||||
|
||||
ret;
|
||||
ENDPROC(serpent_dec_blk_8way)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user