Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
 - Do not idle omap device between crypto operations in one session.
 - Added sha224/sha384 shims for SSSE3.
 - More optimisations for camellia-aesni-avx2.
 - Removed defunct blowfish/twofish AVX2 implementations.
 - Added unaligned buffer self-tests.
 - Added PCLMULQDQ optimisation for CRCT10DIF.
 - Added support for Freescale's DCP co-processor
 - Misc fixes.

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (44 commits)
  crypto: testmgr - test hash implementations with unaligned buffers
  crypto: testmgr - test AEADs with unaligned buffers
  crypto: testmgr - test skciphers with unaligned buffers
  crypto: testmgr - check that entries in alg_test_descs are in correct order
  Revert "crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher"
  Revert "crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher"
  crypto: camellia-aesni-avx2 - tune assembly code for more performance
  hwrng: bcm2835 - fix MODULE_LICENSE tag
  hwrng: nomadik - use clk_prepare_enable()
  crypto: picoxcell - replace strict_strtoul() with kstrtoul()
  crypto: dcp - Staticize local symbols
  crypto: dcp - Use NULL instead of 0
  crypto: dcp - Use devm_* APIs
  crypto: dcp - Remove redundant platform_set_drvdata()
  hwrng: use platform_{get,set}_drvdata()
  crypto: omap-aes - Don't idle/start AES device between Encrypt operations
  crypto: crct10dif - Use PTR_RET
  crypto: ux500 - Cocci spatch "resource_size.spatch"
  crypto: sha256_ssse3 - add sha224 support
  crypto: sha512_ssse3 - add sha384 support
  ...
This commit is contained in:
Linus Torvalds
2013-07-05 12:12:33 -07:00
48 changed files with 2541 additions and 2563 deletions

View File

@@ -736,7 +736,7 @@
dcp@80028000 {
reg = <0x80028000 0x2000>;
interrupts = <52 53 54>;
status = "disabled";
compatible = "fsl-dcp";
};
pxp@8002a000 {

View File

@@ -3,8 +3,6 @@
#
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -29,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
@@ -42,10 +41,8 @@ endif
# These modules require assembler to support AVX2.
ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
@@ -73,10 +70,8 @@ ifeq ($(avx_supported),yes)
endif
ifeq ($(avx2_supported),yes)
blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
@@ -87,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o

View File

@@ -1,449 +0,0 @@
/*
* x86_64/AVX2 assembler optimized version of Blowfish
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/linkage.h>
.file "blowfish-avx2-asm_64.S"
.data
.align 32
.Lprefetch_mask:
.long 0*64
.long 1*64
.long 2*64
.long 3*64
.long 4*64
.long 5*64
.long 6*64
.long 7*64
.Lbswap32_mask:
.long 0x00010203
.long 0x04050607
.long 0x08090a0b
.long 0x0c0d0e0f
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lbswap_iv_mask:
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
.text
/* structure of crypto context */
#define p 0
#define s0 ((16 + 2) * 4)
#define s1 ((16 + 2 + (1 * 256)) * 4)
#define s2 ((16 + 2 + (2 * 256)) * 4)
#define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */
#define CTX %rdi
#define RIO %rdx
#define RS0 %rax
#define RS1 %r8
#define RS2 %r9
#define RS3 %r10
#define RLOOP %r11
#define RLOOPd %r11d
#define RXr0 %ymm8
#define RXr1 %ymm9
#define RXr2 %ymm10
#define RXr3 %ymm11
#define RXl0 %ymm12
#define RXl1 %ymm13
#define RXl2 %ymm14
#define RXl3 %ymm15
/* temp regs */
#define RT0 %ymm0
#define RT0x %xmm0
#define RT1 %ymm1
#define RT1x %xmm1
#define RIDX0 %ymm2
#define RIDX1 %ymm3
#define RIDX1x %xmm3
#define RIDX2 %ymm4
#define RIDX3 %ymm5
/* vpgatherdd mask and '-1' */
#define RNOT %ymm6
/* byte mask, (-1 >> 24) */
#define RBYTE %ymm7
/***********************************************************************
* 32-way AVX2 blowfish
***********************************************************************/
#define F(xl, xr) \
vpsrld $24, xl, RIDX0; \
vpsrld $16, xl, RIDX1; \
vpsrld $8, xl, RIDX2; \
vpand RBYTE, RIDX1, RIDX1; \
vpand RBYTE, RIDX2, RIDX2; \
vpand RBYTE, xl, RIDX3; \
\
vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpcmpeqd RIDX0, RIDX0, RIDX0; \
\
vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
vpcmpeqd RIDX1, RIDX1, RIDX1; \
vpaddd RT0, RT1, RT0; \
\
vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
vpxor RT0, RT1, RT0; \
\
vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpaddd RT0, RT1, RT0; \
\
vpxor RT0, xr, xr;
#define add_roundkey(xl, nmem) \
vpbroadcastd nmem, RT0; \
vpxor RT0, xl ## 0, xl ## 0; \
vpxor RT0, xl ## 1, xl ## 1; \
vpxor RT0, xl ## 2, xl ## 2; \
vpxor RT0, xl ## 3, xl ## 3;
#define round_enc() \
add_roundkey(RXr, p(CTX,RLOOP,4)); \
F(RXl0, RXr0); \
F(RXl1, RXr1); \
F(RXl2, RXr2); \
F(RXl3, RXr3); \
\
add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
F(RXr0, RXl0); \
F(RXr1, RXl1); \
F(RXr2, RXl2); \
F(RXr3, RXl3);
#define round_dec() \
add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
F(RXl0, RXr0); \
F(RXl1, RXr1); \
F(RXl2, RXr2); \
F(RXl3, RXr3); \
\
add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
F(RXr0, RXl0); \
F(RXr1, RXl1); \
F(RXr2, RXl2); \
F(RXr3, RXl3);
#define init_round_constants() \
vpcmpeqd RNOT, RNOT, RNOT; \
leaq s0(CTX), RS0; \
leaq s1(CTX), RS1; \
leaq s2(CTX), RS2; \
leaq s3(CTX), RS3; \
vpsrld $24, RNOT, RBYTE;
#define transpose_2x2(x0, x1, t0) \
vpunpckldq x0, x1, t0; \
vpunpckhdq x0, x1, x1; \
\
vpunpcklqdq t0, x1, x0; \
vpunpckhqdq t0, x1, x1;
#define read_block(xl, xr) \
vbroadcasti128 .Lbswap32_mask, RT1; \
\
vpshufb RT1, xl ## 0, xl ## 0; \
vpshufb RT1, xr ## 0, xr ## 0; \
vpshufb RT1, xl ## 1, xl ## 1; \
vpshufb RT1, xr ## 1, xr ## 1; \
vpshufb RT1, xl ## 2, xl ## 2; \
vpshufb RT1, xr ## 2, xr ## 2; \
vpshufb RT1, xl ## 3, xl ## 3; \
vpshufb RT1, xr ## 3, xr ## 3; \
\
transpose_2x2(xl ## 0, xr ## 0, RT0); \
transpose_2x2(xl ## 1, xr ## 1, RT0); \
transpose_2x2(xl ## 2, xr ## 2, RT0); \
transpose_2x2(xl ## 3, xr ## 3, RT0);
#define write_block(xl, xr) \
vbroadcasti128 .Lbswap32_mask, RT1; \
\
transpose_2x2(xl ## 0, xr ## 0, RT0); \
transpose_2x2(xl ## 1, xr ## 1, RT0); \
transpose_2x2(xl ## 2, xr ## 2, RT0); \
transpose_2x2(xl ## 3, xr ## 3, RT0); \
\
vpshufb RT1, xl ## 0, xl ## 0; \
vpshufb RT1, xr ## 0, xr ## 0; \
vpshufb RT1, xl ## 1, xl ## 1; \
vpshufb RT1, xr ## 1, xr ## 1; \
vpshufb RT1, xl ## 2, xl ## 2; \
vpshufb RT1, xr ## 2, xr ## 2; \
vpshufb RT1, xl ## 3, xl ## 3; \
vpshufb RT1, xr ## 3, xr ## 3;
.align 8
__blowfish_enc_blk32:
/* input:
* %rdi: ctx, CTX
* RXl0..4, RXr0..4: plaintext
* output:
* RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
*/
init_round_constants();
read_block(RXl, RXr);
movl $1, RLOOPd;
add_roundkey(RXl, p+4*(0)(CTX));
.align 4
.L__enc_loop:
round_enc();
leal 2(RLOOPd), RLOOPd;
cmpl $17, RLOOPd;
jne .L__enc_loop;
add_roundkey(RXr, p+4*(17)(CTX));
write_block(RXl, RXr);
ret;
ENDPROC(__blowfish_enc_blk32)
.align 8
__blowfish_dec_blk32:
/* input:
* %rdi: ctx, CTX
* RXl0..4, RXr0..4: ciphertext
* output:
* RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
*/
init_round_constants();
read_block(RXl, RXr);
movl $14, RLOOPd;
add_roundkey(RXl, p+4*(17)(CTX));
.align 4
.L__dec_loop:
round_dec();
addl $-2, RLOOPd;
jns .L__dec_loop;
add_roundkey(RXr, p+4*(0)(CTX));
write_block(RXl, RXr);
ret;
ENDPROC(__blowfish_dec_blk32)
ENTRY(blowfish_ecb_enc_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_enc_blk32;
vmovdqu RXr0, 0*32(%rsi);
vmovdqu RXl0, 1*32(%rsi);
vmovdqu RXr1, 2*32(%rsi);
vmovdqu RXl1, 3*32(%rsi);
vmovdqu RXr2, 4*32(%rsi);
vmovdqu RXl2, 5*32(%rsi);
vmovdqu RXr3, 6*32(%rsi);
vmovdqu RXl3, 7*32(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ecb_enc_32way)
ENTRY(blowfish_ecb_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_dec_blk32;
vmovdqu RXr0, 0*32(%rsi);
vmovdqu RXl0, 1*32(%rsi);
vmovdqu RXr1, 2*32(%rsi);
vmovdqu RXl1, 3*32(%rsi);
vmovdqu RXr2, 4*32(%rsi);
vmovdqu RXl2, 5*32(%rsi);
vmovdqu RXr3, 6*32(%rsi);
vmovdqu RXl3, 7*32(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ecb_dec_32way)
ENTRY(blowfish_cbc_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_dec_blk32;
/* xor with src */
vmovq (%rdx), RT0x;
vpshufd $0x4f, RT0x, RT0x;
vinserti128 $1, 8(%rdx), RT0, RT0;
vpxor RT0, RXr0, RXr0;
vpxor 0*32+24(%rdx), RXl0, RXl0;
vpxor 1*32+24(%rdx), RXr1, RXr1;
vpxor 2*32+24(%rdx), RXl1, RXl1;
vpxor 3*32+24(%rdx), RXr2, RXr2;
vpxor 4*32+24(%rdx), RXl2, RXl2;
vpxor 5*32+24(%rdx), RXr3, RXr3;
vpxor 6*32+24(%rdx), RXl3, RXl3;
vmovdqu RXr0, (0*32)(%rsi);
vmovdqu RXl0, (1*32)(%rsi);
vmovdqu RXr1, (2*32)(%rsi);
vmovdqu RXl1, (3*32)(%rsi);
vmovdqu RXr2, (4*32)(%rsi);
vmovdqu RXl2, (5*32)(%rsi);
vmovdqu RXr3, (6*32)(%rsi);
vmovdqu RXl3, (7*32)(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_cbc_dec_32way)
ENTRY(blowfish_ctr_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
vzeroupper;
vpcmpeqd RT0, RT0, RT0;
vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
vpcmpeqd RT1x, RT1x, RT1x;
vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
vpxor RIDX0, RIDX0, RIDX0;
vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
vpcmpeqd RT1, RT1, RT1;
vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
vbroadcasti128 .Lbswap_iv_mask, RIDX0;
vbroadcasti128 .Lbswap128_mask, RIDX1;
/* load IV and byteswap */
vmovq (%rcx), RT1x;
vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
/* construct IVs */
vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl1;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr1;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl2;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr2;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl3;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr3;
/* store last IV */
vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
vmovq RT1x, (%rcx);
call __blowfish_enc_blk32;
/* dst = src ^ iv */
vpxor 0*32(%rdx), RXr0, RXr0;
vpxor 1*32(%rdx), RXl0, RXl0;
vpxor 2*32(%rdx), RXr1, RXr1;
vpxor 3*32(%rdx), RXl1, RXl1;
vpxor 4*32(%rdx), RXr2, RXr2;
vpxor 5*32(%rdx), RXl2, RXl2;
vpxor 6*32(%rdx), RXr3, RXr3;
vpxor 7*32(%rdx), RXl3, RXl3;
vmovdqu RXr0, (0*32)(%rsi);
vmovdqu RXl0, (1*32)(%rsi);
vmovdqu RXr1, (2*32)(%rsi);
vmovdqu RXl1, (3*32)(%rsi);
vmovdqu RXr2, (4*32)(%rsi);
vmovdqu RXl2, (5*32)(%rsi);
vmovdqu RXr3, (6*32)(%rsi);
vmovdqu RXl3, (7*32)(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ctr_32way)

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
* Glue Code for assembler optimized version of Blowfish
*
* Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
@@ -32,24 +32,40 @@
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <asm/crypto/blowfish.h>
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
EXPORT_SYMBOL_GPL(blowfish_dec_blk);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, true);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, true);
}
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{

View File

@@ -51,16 +51,6 @@
#define ymm14_x xmm14
#define ymm15_x xmm15
/*
* AES-NI instructions do not support ymmX registers, so we need splitting and
* merging.
*/
#define vaesenclast256(zero, yreg, tmp) \
vextracti128 $1, yreg, tmp##_x; \
vaesenclast zero##_x, yreg##_x, yreg##_x; \
vaesenclast zero##_x, tmp##_x, tmp##_x; \
vinserti128 $1, tmp##_x, yreg, yreg;
/**********************************************************************
32-way camellia
**********************************************************************/
@@ -79,46 +69,70 @@
* S-function with AES subbytes \
*/ \
vbroadcasti128 .Linv_shift_row, t4; \
vpbroadcastb .L0f0f0f0f, t7; \
vbroadcasti128 .Lpre_tf_lo_s1, t0; \
vbroadcasti128 .Lpre_tf_hi_s1, t1; \
vpbroadcastd .L0f0f0f0f, t7; \
vbroadcasti128 .Lpre_tf_lo_s1, t5; \
vbroadcasti128 .Lpre_tf_hi_s1, t6; \
vbroadcasti128 .Lpre_tf_lo_s4, t2; \
vbroadcasti128 .Lpre_tf_hi_s4, t3; \
\
/* AES inverse shift rows */ \
vpshufb t4, x0, x0; \
vpshufb t4, x7, x7; \
vpshufb t4, x1, x1; \
vpshufb t4, x4, x4; \
vpshufb t4, x2, x2; \
vpshufb t4, x5, x5; \
vpshufb t4, x3, x3; \
vpshufb t4, x6, x6; \
vpshufb t4, x2, x2; \
vpshufb t4, x5, x5; \
vpshufb t4, x1, x1; \
vpshufb t4, x4, x4; \
\
/* prefilter sboxes 1, 2 and 3 */ \
vbroadcasti128 .Lpre_tf_lo_s4, t2; \
vbroadcasti128 .Lpre_tf_hi_s4, t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x1, t0, t1, t7, t6); \
filter_8bit(x4, t0, t1, t7, t6); \
filter_8bit(x2, t0, t1, t7, t6); \
filter_8bit(x5, t0, t1, t7, t6); \
\
/* prefilter sbox 4 */ \
filter_8bit(x0, t5, t6, t7, t4); \
filter_8bit(x7, t5, t6, t7, t4); \
vextracti128 $1, x0, t0##_x; \
vextracti128 $1, x7, t1##_x; \
filter_8bit(x3, t2, t3, t7, t4); \
filter_8bit(x6, t2, t3, t7, t4); \
vextracti128 $1, x3, t3##_x; \
vextracti128 $1, x6, t2##_x; \
filter_8bit(x2, t5, t6, t7, t4); \
filter_8bit(x5, t5, t6, t7, t4); \
filter_8bit(x1, t5, t6, t7, t4); \
filter_8bit(x4, t5, t6, t7, t4); \
\
vpxor t4##_x, t4##_x, t4##_x; \
filter_8bit(x3, t2, t3, t7, t6); \
filter_8bit(x6, t2, t3, t7, t6); \
\
/* AES subbytes + AES shift rows */ \
vextracti128 $1, x2, t6##_x; \
vextracti128 $1, x5, t5##_x; \
vaesenclast t4##_x, x0##_x, x0##_x; \
vaesenclast t4##_x, t0##_x, t0##_x; \
vinserti128 $1, t0##_x, x0, x0; \
vaesenclast t4##_x, x7##_x, x7##_x; \
vaesenclast t4##_x, t1##_x, t1##_x; \
vinserti128 $1, t1##_x, x7, x7; \
vaesenclast t4##_x, x3##_x, x3##_x; \
vaesenclast t4##_x, t3##_x, t3##_x; \
vinserti128 $1, t3##_x, x3, x3; \
vaesenclast t4##_x, x6##_x, x6##_x; \
vaesenclast t4##_x, t2##_x, t2##_x; \
vinserti128 $1, t2##_x, x6, x6; \
vextracti128 $1, x1, t3##_x; \
vextracti128 $1, x4, t2##_x; \
vbroadcasti128 .Lpost_tf_lo_s1, t0; \
vbroadcasti128 .Lpost_tf_hi_s1, t1; \
vaesenclast256(t4, x0, t5); \
vaesenclast256(t4, x7, t5); \
vaesenclast256(t4, x1, t5); \
vaesenclast256(t4, x4, t5); \
vaesenclast256(t4, x2, t5); \
vaesenclast256(t4, x5, t5); \
vaesenclast256(t4, x3, t5); \
vaesenclast256(t4, x6, t5); \
vaesenclast t4##_x, x2##_x, x2##_x; \
vaesenclast t4##_x, t6##_x, t6##_x; \
vinserti128 $1, t6##_x, x2, x2; \
vaesenclast t4##_x, x5##_x, x5##_x; \
vaesenclast t4##_x, t5##_x, t5##_x; \
vinserti128 $1, t5##_x, x5, x5; \
vaesenclast t4##_x, x1##_x, x1##_x; \
vaesenclast t4##_x, t3##_x, t3##_x; \
vinserti128 $1, t3##_x, x1, x1; \
vaesenclast t4##_x, x4##_x, x4##_x; \
vaesenclast t4##_x, t2##_x, t2##_x; \
vinserti128 $1, t2##_x, x4, x4; \
\
/* postfilter sboxes 1 and 4 */ \
vbroadcasti128 .Lpost_tf_lo_s3, t2; \
@@ -139,22 +153,12 @@
/* postfilter sbox 2 */ \
filter_8bit(x1, t4, t5, t7, t2); \
filter_8bit(x4, t4, t5, t7, t2); \
vpxor t7, t7, t7; \
\
vpsrldq $1, t0, t1; \
vpsrldq $2, t0, t2; \
vpshufb t7, t1, t1; \
vpsrldq $3, t0, t3; \
vpsrldq $4, t0, t4; \
vpsrldq $5, t0, t5; \
vpsrldq $6, t0, t6; \
vpsrldq $7, t0, t7; \
vpbroadcastb t0##_x, t0; \
vpbroadcastb t1##_x, t1; \
vpbroadcastb t2##_x, t2; \
vpbroadcastb t3##_x, t3; \
vpbroadcastb t4##_x, t4; \
vpbroadcastb t6##_x, t6; \
vpbroadcastb t5##_x, t5; \
vpbroadcastb t7##_x, t7; \
\
/* P-function */ \
vpxor x5, x0, x0; \
@@ -162,11 +166,21 @@
vpxor x7, x2, x2; \
vpxor x4, x3, x3; \
\
vpshufb t7, t2, t2; \
vpsrldq $4, t0, t4; \
vpshufb t7, t3, t3; \
vpsrldq $5, t0, t5; \
vpshufb t7, t4, t4; \
\
vpxor x2, x4, x4; \
vpxor x3, x5, x5; \
vpxor x0, x6, x6; \
vpxor x1, x7, x7; \
\
vpsrldq $6, t0, t6; \
vpshufb t7, t5, t5; \
vpshufb t7, t6, t6; \
\
vpxor x7, x0, x0; \
vpxor x4, x1, x1; \
vpxor x5, x2, x2; \
@@ -179,12 +193,16 @@
\
/* Add key material and result to CD (x becomes new CD) */ \
\
vpxor t7, x0, x0; \
vpxor 4 * 32(mem_cd), x0, x0; \
\
vpxor t6, x1, x1; \
vpxor 5 * 32(mem_cd), x1, x1; \
\
vpsrldq $7, t0, t6; \
vpshufb t7, t0, t0; \
vpshufb t7, t6, t7; \
\
vpxor t7, x0, x0; \
vpxor 4 * 32(mem_cd), x0, x0; \
\
vpxor t5, x2, x2; \
vpxor 6 * 32(mem_cd), x2, x2; \
\
@@ -204,7 +222,7 @@
vpxor 3 * 32(mem_cd), x7, x7;
/*
* Size optimization... with inlined roundsm16 binary would be over 5 times
* Size optimization... with inlined roundsm32 binary would be over 5 times
* larger and would only marginally faster.
*/
.align 8
@@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
*/ \
vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
vpxor tt0, tt0, tt0; \
vpbroadcastb t0##_x, t3; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t2; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t1; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t0; \
vpshufb tt0, t0, t0; \
\
vpand l0, t0, t0; \
vpand l1, t1, t1; \
@@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
\
vpxor l4, t0, l4; \
vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
vmovdqu l4, 4 * 32(l); \
vpxor l5, t1, l5; \
vmovdqu l5, 5 * 32(l); \
@@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* rl ^= t2; \
*/ \
\
vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
vpbroadcastb t0##_x, t3; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t2; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t1; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t0; \
vpshufb tt0, t0, t0; \
\
vpor 4 * 32(r), t0, t0; \
vpor 5 * 32(r), t1, t1; \
@@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vpxor 2 * 32(r), t2, t2; \
vpxor 3 * 32(r), t3, t3; \
vmovdqu t0, 0 * 32(r); \
vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
vmovdqu t1, 1 * 32(r); \
vmovdqu t2, 2 * 32(r); \
vmovdqu t3, 3 * 32(r); \
@@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* t2 &= rl; \
* rr ^= rol32(t2, 1); \
*/ \
vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
vpbroadcastb t0##_x, t3; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t2; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t1; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t0; \
vpshufb tt0, t0, t0; \
\
vpand 0 * 32(r), t0, t0; \
vpand 1 * 32(r), t1, t1; \
@@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vpxor 6 * 32(r), t2, t2; \
vpxor 7 * 32(r), t3, t3; \
vmovdqu t0, 4 * 32(r); \
vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
vmovdqu t1, 5 * 32(r); \
vmovdqu t2, 6 * 32(r); \
vmovdqu t3, 7 * 32(r); \
@@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* ll ^= t0; \
*/ \
\
vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
vpbroadcastb t0##_x, t3; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t2; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t1; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t0; \
vpshufb tt0, t0, t0; \
\
vpor l4, t0, t0; \
vpor l5, t1, t1; \

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,151 @@
/*
* Cryptographic API.
*
* T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
*
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/i387.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
size_t len);
struct chksum_desc_ctx {
__u16 crc;
};
/*
* Steps through buffer one byte at at time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = 0;
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
if (irq_fpu_usable()) {
kernel_fpu_begin();
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
kernel_fpu_end();
} else
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
*(__u16 *)out = ctx->crc;
return 0;
}
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
if (irq_fpu_usable()) {
kernel_fpu_begin();
*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
kernel_fpu_end();
} else
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, length, out);
}
static struct shash_alg alg = {
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crct10dif",
.cra_driver_name = "crct10dif-pclmul",
.cra_priority = 200,
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static const struct x86_cpu_id crct10dif_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
static int __init crct10dif_intel_mod_init(void)
{
if (!x86_match_cpu(crct10dif_cpu_id))
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit crct10dif_intel_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crct10dif_intel_mod_init);
module_exit(crct10dif_intel_mod_fini);
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
MODULE_LICENSE("GPL");
MODULE_ALIAS("crct10dif");
MODULE_ALIAS("crct10dif-pclmul");

View File

@@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
return 0;
}
static struct shash_alg alg = {
static int sha224_ssse3_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_ssse3_final(desc, D);
memcpy(hash, D, SHA224_DIGEST_SIZE);
memset(D, 0, SHA256_DIGEST_SIZE);
return 0;
}
static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_ssse3_init,
.update = sha256_ssse3_update,
@@ -204,7 +233,24 @@ static struct shash_alg alg = {
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_ssse3_init,
.update = sha256_ssse3_update,
.final = sha224_ssse3_final,
.export = sha256_ssse3_export,
.import = sha256_ssse3_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ssse3",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
@@ -227,7 +273,7 @@ static bool __init avx_usable(void)
static int __init sha256_ssse3_mod_init(void)
{
/* test for SSE3 first */
/* test for SSSE3 first */
if (cpu_has_ssse3)
sha256_transform_asm = sha256_transform_ssse3;
@@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void)
else
#endif
pr_info("Using SSSE3 optimized SHA-256 implementation\n");
return crypto_register_shash(&alg);
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
@@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void)
static void __exit sha256_ssse3_mod_fini(void)
{
crypto_unregister_shash(&alg);
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha256_ssse3_mod_init);
@@ -273,3 +319,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha256");
MODULE_ALIAS("sha384");

View File

@@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
return 0;
}
static struct shash_alg alg = {
static int sha384_ssse3_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA384_H0;
sctx->state[1] = SHA384_H1;
sctx->state[2] = SHA384_H2;
sctx->state[3] = SHA384_H3;
sctx->state[4] = SHA384_H4;
sctx->state[5] = SHA384_H5;
sctx->state[6] = SHA384_H6;
sctx->state[7] = SHA384_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA512_DIGEST_SIZE];
sha512_ssse3_final(desc, D);
memcpy(hash, D, SHA384_DIGEST_SIZE);
memset(D, 0, SHA512_DIGEST_SIZE);
return 0;
}
static struct shash_alg algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_ssse3_init,
.update = sha512_ssse3_update,
@@ -211,7 +241,24 @@ static struct shash_alg alg = {
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_ssse3_init,
.update = sha512_ssse3_update,
.final = sha384_ssse3_final,
.export = sha512_ssse3_export,
.import = sha512_ssse3_import,
.descsize = sizeof(struct sha512_state),
.statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-ssse3",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
@@ -234,7 +281,7 @@ static bool __init avx_usable(void)
static int __init sha512_ssse3_mod_init(void)
{
/* test for SSE3 first */
/* test for SSSE3 first */
if (cpu_has_ssse3)
sha512_transform_asm = sha512_transform_ssse3;
@@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void)
else
#endif
pr_info("Using SSSE3 optimized SHA-512 implementation\n");
return crypto_register_shash(&alg);
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
@@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void)
static void __exit sha512_ssse3_mod_fini(void)
{
crypto_unregister_shash(&alg);
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha512_ssse3_mod_init);
@@ -280,3 +327,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha512");
MODULE_ALIAS("sha384");

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -50,26 +50,18 @@
/* 8-way parallel cipher functions */
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way);
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way);
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way);
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(twofish_ctr_8way);
asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(twofish_xts_enc_8way);
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(twofish_xts_dec_8way);
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
@@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
__twofish_enc_blk_3way(ctx, dst, src, false);
}
void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_enc_blk));
}
EXPORT_SYMBOL_GPL(twofish_xts_enc);
void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_dec_blk));
}
EXPORT_SYMBOL_GPL(twofish_xts_dec);
static const struct common_glue_ctx twofish_enc = {

View File

@@ -1,43 +0,0 @@
#ifndef ASM_X86_BLOWFISH_H
#define ASM_X86_BLOWFISH_H
#include <linux/crypto.h>
#include <crypto/blowfish.h>
#define BF_PARALLEL_BLOCKS 4
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, true);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, true);
}
#endif

View File

@@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
/* 8-way parallel cipher functions */
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
/* helpers from twofish_x86_64-3way module */
extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
@@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
/* helpers from twofish-avx module */
extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
#endif /* ASM_X86_TWOFISH_H */

View File

@@ -376,6 +376,25 @@ config CRYPTO_CRC32_PCLMUL
which will enable any routine to use the CRC-32-IEEE 802.3 checksum
and gain better performance as compared with the table implementation.
config CRYPTO_CRCT10DIF
tristate "CRCT10DIF algorithm"
select CRYPTO_HASH
help
CRC T10 Data Integrity Field computation is being cast as
a crypto transform. This allows for faster crc t10 diff
transforms to be used if they are available.
config CRYPTO_CRCT10DIF_PCLMUL
tristate "CRCT10DIF PCLMULQDQ hardware acceleration"
depends on X86 && 64BIT && CRC_T10DIF
select CRYPTO_HASH
help
For x86_64 processors with SSE4.2 and PCLMULQDQ supported,
CRC T10 DIF PCLMULQDQ computation can be hardware
accelerated PCLMULQDQ instruction. This option will create
'crct10dif-plcmul' module, which is faster when computing the
crct10dif checksum as compared with the generic table implementation.
config CRYPTO_GHASH
tristate "GHASH digest algorithm"
select CRYPTO_GF128MUL
@@ -820,25 +839,6 @@ config CRYPTO_BLOWFISH_X86_64
See also:
<http://www.schneier.com/blowfish.html>
config CRYPTO_BLOWFISH_AVX2_X86_64
tristate "Blowfish cipher algorithm (x86_64/AVX2)"
depends on X86 && 64BIT
depends on BROKEN
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_BLOWFISH_COMMON
select CRYPTO_BLOWFISH_X86_64
help
Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier.
This is a variable key length cipher which can use keys from 32
bits to 448 bits in length. It's fast, simple and specifically
designed for use on "large microprocessors".
See also:
<http://www.schneier.com/blowfish.html>
config CRYPTO_CAMELLIA
tristate "Camellia cipher algorithms"
depends on CRYPTO
@@ -1297,31 +1297,6 @@ config CRYPTO_TWOFISH_AVX_X86_64
See also:
<http://www.schneier.com/twofish.html>
config CRYPTO_TWOFISH_AVX2_X86_64
tristate "Twofish cipher algorithm (x86_64/AVX2)"
depends on X86 && 64BIT
depends on BROKEN
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_TWOFISH_COMMON
select CRYPTO_TWOFISH_X86_64
select CRYPTO_TWOFISH_X86_64_3WAY
select CRYPTO_TWOFISH_AVX_X86_64
select CRYPTO_LRW
select CRYPTO_XTS
help
Twofish cipher algorithm (x86_64/AVX2).
Twofish was submitted as an AES (Advanced Encryption Standard)
candidate cipher by researchers at CounterPane Systems. It is a
16 round block cipher supporting key sizes of 128, 192, and 256
bits.
See also:
<http://www.schneier.com/twofish.html>
comment "Compression"
config CRYPTO_DEFLATE

View File

@@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
obj-$(CONFIG_CRYPTO_842) += 842.o

178
crypto/crct10dif.c Normal file
View File

@@ -0,0 +1,178 @@
/*
* Cryptographic API.
*
* T10 Data Integrity Field CRC16 Crypto Transform
*
* Copyright (c) 2007 Oracle Corporation. All rights reserved.
* Written by Martin K. Petersen <martin.petersen@oracle.com>
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kernel.h>
struct chksum_desc_ctx {
__u16 crc;
};
/* Table generated using the following polynomium:
* x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
* gt: 0x8bb7
*/
static const __u16 t10_dif_crc_table[256] = {
0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
};
__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
{
unsigned int i;
for (i = 0 ; i < len ; i++)
crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
return crc;
}
EXPORT_SYMBOL(crc_t10dif_generic);
/*
* Steps through buffer one byte at at time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = 0;
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
*(__u16 *)out = ctx->crc;
return 0;
}
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, length, out);
}
static struct shash_alg alg = {
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crct10dif",
.cra_driver_name = "crct10dif-generic",
.cra_priority = 100,
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init crct10dif_mod_init(void)
{
int ret;
ret = crypto_register_shash(&alg);
return ret;
}
static void __exit crct10dif_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crct10dif_mod_init);
module_exit(crct10dif_mod_fini);
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
MODULE_DESCRIPTION("T10 DIF CRC calculation.");
MODULE_LICENSE("GPL");

View File

@@ -251,6 +251,7 @@ static struct shash_alg sha512_algs[2] = { {
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-generic",
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@@ -263,6 +264,7 @@ static struct shash_alg sha512_algs[2] = { {
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-generic",
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,

View File

@@ -1174,6 +1174,10 @@ static int do_test(int m)
ret += tcrypt_test("ghash");
break;
case 47:
ret += tcrypt_test("crct10dif");
break;
case 100:
ret += tcrypt_test("hmac(md5)");
break;
@@ -1498,6 +1502,10 @@ static int do_test(int m)
test_hash_speed("crc32c", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
case 320:
test_hash_speed("crct10dif", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
case 399:
break;

Some files were not shown because too many files have changed in this diff Show More