mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "Algorithms: - Add AES-NI/AVX/x86_64 implementation of SM4. Drivers: - Add Arm SMCCC TRNG based driver" [ And obviously a lot of random fixes and updates - Linus] * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (84 commits) crypto: sha512 - remove imaginary and mystifying clearing of variables crypto: aesni - xts_crypt() return if walk.nbytes is 0 padata: Remove repeated verbose license text crypto: ccp - Add support for new CCP/PSP device ID crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation crypto: x86/sm4 - export reusable AESNI/AVX functions crypto: rmd320 - remove rmd320 in Makefile crypto: skcipher - in_irq() cleanup crypto: hisilicon - check _PS0 and _PR0 method crypto: hisilicon - change parameter passing of debugfs function crypto: hisilicon - support runtime PM for accelerator device crypto: hisilicon - add runtime PM ops crypto: hisilicon - using 'debugfs_create_file' instead of 'debugfs_create_regset32' crypto: tcrypt - add GCM/CCM mode test for SM4 algorithm crypto: testmgr - Add GCM/CCM mode test of SM4 algorithm crypto: tcrypt - Fix missing return value check crypto: hisilicon/sec - modify the hardware endian configuration crypto: hisilicon/sec - fix the abnormal exiting process crypto: qat - store vf.compatible flag crypto: qat - do not export adf_iov_putmsg() ...
This commit is contained in:
@@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = {
|
||||
.max_size = curve25519_max_size,
|
||||
};
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init arm_curve25519_init(void)
|
||||
{
|
||||
if (elf_hwcap & HWCAP_NEON) {
|
||||
static_branch_enable(&have_neon);
|
||||
@@ -122,14 +122,14 @@ static int __init mod_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit arm_curve25519_exit(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
|
||||
crypto_unregister_kpp(&curve25519_alg);
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
module_init(arm_curve25519_init);
|
||||
module_exit(arm_curve25519_exit);
|
||||
|
||||
MODULE_ALIAS_CRYPTO("curve25519");
|
||||
MODULE_ALIAS_CRYPTO("curve25519-neon");
|
||||
|
||||
@@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE
|
||||
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_SM4
|
||||
select CRYPTO_LIB_SM4
|
||||
|
||||
config CRYPTO_GHASH_ARM64_CE
|
||||
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
|
||||
|
||||
@@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2");
|
||||
|
||||
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
|
||||
|
||||
static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
return sm4_expandkey(ctx, key, key_len);
|
||||
}
|
||||
|
||||
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!crypto_simd_usable()) {
|
||||
crypto_sm4_encrypt(tfm, out, in);
|
||||
sm4_crypt_block(ctx->rkey_enc, out, in);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
|
||||
@@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
|
||||
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!crypto_simd_usable()) {
|
||||
crypto_sm4_decrypt(tfm, out, in);
|
||||
sm4_crypt_block(ctx->rkey_dec, out, in);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
|
||||
@@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = {
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u.cipher = {
|
||||
.cia_min_keysize = SM4_KEY_SIZE,
|
||||
.cia_max_keysize = SM4_KEY_SIZE,
|
||||
.cia_setkey = crypto_sm4_set_key,
|
||||
.cia_setkey = sm4_ce_setkey,
|
||||
.cia_encrypt = sm4_ce_encrypt,
|
||||
.cia_decrypt = sm4_ce_decrypt
|
||||
}
|
||||
|
||||
@@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
|
||||
sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o
|
||||
sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
|
||||
|
||||
quiet_cmd_perlasm = PERLASM $@
|
||||
cmd_perlasm = $(PERL) $< > $@
|
||||
$(obj)/%.S: $(src)/%.pl FORCE
|
||||
|
||||
@@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
|
||||
return -EINVAL;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (!walk.nbytes)
|
||||
return err;
|
||||
|
||||
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
|
||||
int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||
@@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
blocks * AES_BLOCK_SIZE, req->iv);
|
||||
req = &subreq;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
} else {
|
||||
tail = 0;
|
||||
}
|
||||
|
||||
589
arch/x86/crypto/sm4-aesni-avx-asm_64.S
Normal file
589
arch/x86/crypto/sm4-aesni-avx-asm_64.S
Normal file
File diff suppressed because it is too large
Load Diff
497
arch/x86/crypto/sm4-aesni-avx2-asm_64.S
Normal file
497
arch/x86/crypto/sm4-aesni-avx2-asm_64.S
Normal file
@@ -0,0 +1,497 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
|
||||
* as specified in
|
||||
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
|
||||
*
|
||||
* Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
* Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
|
||||
* https://github.com/mjosaarinen/sm4ni
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define rRIP (%rip)
|
||||
|
||||
/* vector registers */
|
||||
#define RX0 %ymm0
|
||||
#define RX1 %ymm1
|
||||
#define MASK_4BIT %ymm2
|
||||
#define RTMP0 %ymm3
|
||||
#define RTMP1 %ymm4
|
||||
#define RTMP2 %ymm5
|
||||
#define RTMP3 %ymm6
|
||||
#define RTMP4 %ymm7
|
||||
|
||||
#define RA0 %ymm8
|
||||
#define RA1 %ymm9
|
||||
#define RA2 %ymm10
|
||||
#define RA3 %ymm11
|
||||
|
||||
#define RB0 %ymm12
|
||||
#define RB1 %ymm13
|
||||
#define RB2 %ymm14
|
||||
#define RB3 %ymm15
|
||||
|
||||
#define RNOT %ymm0
|
||||
#define RBSWAP %ymm1
|
||||
|
||||
#define RX0x %xmm0
|
||||
#define RX1x %xmm1
|
||||
#define MASK_4BITx %xmm2
|
||||
|
||||
#define RNOTx %xmm0
|
||||
#define RBSWAPx %xmm1
|
||||
|
||||
#define RTMP0x %xmm3
|
||||
#define RTMP1x %xmm4
|
||||
#define RTMP2x %xmm5
|
||||
#define RTMP3x %xmm6
|
||||
#define RTMP4x %xmm7
|
||||
|
||||
|
||||
/* helper macros */
|
||||
|
||||
/* Transpose four 32-bit words between 128-bit vector lanes. */
|
||||
#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
|
||||
vpunpckhdq x1, x0, t2; \
|
||||
vpunpckldq x1, x0, x0; \
|
||||
\
|
||||
vpunpckldq x3, x2, t1; \
|
||||
vpunpckhdq x3, x2, x2; \
|
||||
\
|
||||
vpunpckhqdq t1, x0, x1; \
|
||||
vpunpcklqdq t1, x0, x0; \
|
||||
\
|
||||
vpunpckhqdq x2, t2, x3; \
|
||||
vpunpcklqdq x2, t2, x2;
|
||||
|
||||
/* post-SubByte transform. */
|
||||
#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
|
||||
vpand x, mask4bit, tmp0; \
|
||||
vpandn x, mask4bit, x; \
|
||||
vpsrld $4, x, x; \
|
||||
\
|
||||
vpshufb tmp0, lo_t, tmp0; \
|
||||
vpshufb x, hi_t, x; \
|
||||
vpxor tmp0, x, x;
|
||||
|
||||
/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
|
||||
* 'vaeslastenc' instruction. */
|
||||
#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
|
||||
vpandn mask4bit, x, tmp0; \
|
||||
vpsrld $4, x, x; \
|
||||
vpand x, mask4bit, x; \
|
||||
\
|
||||
vpshufb tmp0, lo_t, tmp0; \
|
||||
vpshufb x, hi_t, x; \
|
||||
vpxor tmp0, x, x;
|
||||
|
||||
|
||||
.section .rodata.cst164, "aM", @progbits, 164
|
||||
.align 16
|
||||
|
||||
/*
|
||||
* Following four affine transform look-up tables are from work by
|
||||
* Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
|
||||
*
|
||||
* These allow exposing SM4 S-Box from AES SubByte.
|
||||
*/
|
||||
|
||||
/* pre-SubByte affine transform, from SM4 field to AES field. */
|
||||
.Lpre_tf_lo_s:
|
||||
.quad 0x9197E2E474720701, 0xC7C1B4B222245157
|
||||
.Lpre_tf_hi_s:
|
||||
.quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
|
||||
|
||||
/* post-SubByte affine transform, from AES field to SM4 field. */
|
||||
.Lpost_tf_lo_s:
|
||||
.quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
|
||||
.Lpost_tf_hi_s:
|
||||
.quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
|
||||
|
||||
/* For isolating SubBytes from AESENCLAST, inverse shift row */
|
||||
.Linv_shift_row:
|
||||
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
|
||||
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
||||
|
||||
/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_8:
|
||||
.byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
|
||||
.byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
|
||||
|
||||
/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_16:
|
||||
.byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
|
||||
.byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
|
||||
|
||||
/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_24:
|
||||
.byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
|
||||
.byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
|
||||
|
||||
/* For CTR-mode IV byteswap */
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
/* For input word byte-swap */
|
||||
.Lbswap32_mask:
|
||||
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
|
||||
.align 4
|
||||
/* 4-bit mask */
|
||||
.L0f0f0f0f:
|
||||
.long 0x0f0f0f0f
|
||||
|
||||
.text
|
||||
.align 16
|
||||
|
||||
.align 8
|
||||
SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
|
||||
* plaintext blocks
|
||||
* output:
|
||||
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
|
||||
* ciphertext blocks
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
|
||||
vpshufb RTMP2, RA0, RA0;
|
||||
vpshufb RTMP2, RA1, RA1;
|
||||
vpshufb RTMP2, RA2, RA2;
|
||||
vpshufb RTMP2, RA3, RA3;
|
||||
vpshufb RTMP2, RB0, RB0;
|
||||
vpshufb RTMP2, RB1, RB1;
|
||||
vpshufb RTMP2, RB2, RB2;
|
||||
vpshufb RTMP2, RB3, RB3;
|
||||
|
||||
vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
|
||||
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
|
||||
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
|
||||
|
||||
#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
|
||||
vpbroadcastd (4*(round))(%rdi), RX0; \
|
||||
vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
|
||||
vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
|
||||
vmovdqa RX0, RX1; \
|
||||
vpxor s1, RX0, RX0; \
|
||||
vpxor s2, RX0, RX0; \
|
||||
vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
|
||||
vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
|
||||
vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
|
||||
vpxor r1, RX1, RX1; \
|
||||
vpxor r2, RX1, RX1; \
|
||||
vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
|
||||
\
|
||||
/* sbox, non-linear part */ \
|
||||
transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
|
||||
transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
|
||||
vextracti128 $1, RX0, RTMP4x; \
|
||||
vextracti128 $1, RX1, RTMP0x; \
|
||||
vaesenclast MASK_4BITx, RX0x, RX0x; \
|
||||
vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
|
||||
vaesenclast MASK_4BITx, RX1x, RX1x; \
|
||||
vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
|
||||
vinserti128 $1, RTMP4x, RX0, RX0; \
|
||||
vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
|
||||
vinserti128 $1, RTMP0x, RX1, RX1; \
|
||||
transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
|
||||
transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
|
||||
\
|
||||
/* linear part */ \
|
||||
vpshufb RTMP4, RX0, RTMP0; \
|
||||
vpxor RTMP0, s0, s0; /* s0 ^ x */ \
|
||||
vpshufb RTMP4, RX1, RTMP2; \
|
||||
vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
|
||||
vpxor RTMP2, r0, r0; /* r0 ^ x */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
|
||||
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
|
||||
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
|
||||
vpslld $2, RTMP0, RTMP1; \
|
||||
vpsrld $30, RTMP0, RTMP0; \
|
||||
vpxor RTMP0, s0, s0; \
|
||||
/* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
|
||||
vpxor RTMP1, s0, s0; \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
|
||||
vpslld $2, RTMP2, RTMP3; \
|
||||
vpsrld $30, RTMP2, RTMP2; \
|
||||
vpxor RTMP2, r0, r0; \
|
||||
/* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
|
||||
vpxor RTMP3, r0, r0;
|
||||
|
||||
leaq (32*4)(%rdi), %rax;
|
||||
.align 16
|
||||
.Lroundloop_blk8:
|
||||
ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
|
||||
ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
|
||||
ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
|
||||
ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
|
||||
leaq (4*4)(%rdi), %rdi;
|
||||
cmpq %rax, %rdi;
|
||||
jne .Lroundloop_blk8;
|
||||
|
||||
#undef ROUND
|
||||
|
||||
vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
|
||||
|
||||
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
|
||||
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
|
||||
vpshufb RTMP2, RA0, RA0;
|
||||
vpshufb RTMP2, RA1, RA1;
|
||||
vpshufb RTMP2, RA2, RA2;
|
||||
vpshufb RTMP2, RA3, RA3;
|
||||
vpshufb RTMP2, RB0, RB0;
|
||||
vpshufb RTMP2, RB1, RB1;
|
||||
vpshufb RTMP2, RB2, RB2;
|
||||
vpshufb RTMP2, RB3, RB3;
|
||||
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(__sm4_crypt_blk16)
|
||||
|
||||
#define inc_le128(x, minus_one, tmp) \
|
||||
vpcmpeqq minus_one, x, tmp; \
|
||||
vpsubq minus_one, x, x; \
|
||||
vpslldq $8, tmp, tmp; \
|
||||
vpsubq tmp, x, x;
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv (big endian, 128bit)
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
movq 8(%rcx), %rax;
|
||||
bswapq %rax;
|
||||
|
||||
vzeroupper;
|
||||
|
||||
vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
|
||||
vpcmpeqd RNOT, RNOT, RNOT;
|
||||
vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
|
||||
vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
|
||||
|
||||
/* load IV and byteswap */
|
||||
vmovdqu (%rcx), RTMP4x;
|
||||
vpshufb RTMP3x, RTMP4x, RTMP4x;
|
||||
vmovdqa RTMP4x, RTMP0x;
|
||||
inc_le128(RTMP4x, RNOTx, RTMP1x);
|
||||
vinserti128 $1, RTMP4x, RTMP0, RTMP0;
|
||||
vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
|
||||
|
||||
/* check need for handling 64-bit overflow and carry */
|
||||
cmpq $(0xffffffffffffffff - 16), %rax;
|
||||
ja .Lhandle_ctr_carry;
|
||||
|
||||
/* construct IVs */
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
|
||||
vpshufb RTMP3, RTMP0, RA1;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
|
||||
vpshufb RTMP3, RTMP0, RA2;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
|
||||
vpshufb RTMP3, RTMP0, RA3;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
|
||||
vpshufb RTMP3, RTMP0, RB0;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
|
||||
vpshufb RTMP3, RTMP0, RB1;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
|
||||
vpshufb RTMP3, RTMP0, RB2;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
|
||||
vpshufb RTMP3, RTMP0, RB3;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
|
||||
vpshufb RTMP3x, RTMP0x, RTMP0x;
|
||||
|
||||
jmp .Lctr_carry_done;
|
||||
|
||||
.Lhandle_ctr_carry:
|
||||
/* construct IVs */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vextracti128 $1, RTMP0, RTMP0x;
|
||||
vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
|
||||
|
||||
.align 4
|
||||
.Lctr_carry_done:
|
||||
/* store new IV */
|
||||
vmovdqu RTMP0x, (%rcx);
|
||||
|
||||
call __sm4_crypt_blk16;
|
||||
|
||||
vpxor (0 * 32)(%rdx), RA0, RA0;
|
||||
vpxor (1 * 32)(%rdx), RA1, RA1;
|
||||
vpxor (2 * 32)(%rdx), RA2, RA2;
|
||||
vpxor (3 * 32)(%rdx), RA3, RA3;
|
||||
vpxor (4 * 32)(%rdx), RB0, RB0;
|
||||
vpxor (5 * 32)(%rdx), RB1, RB1;
|
||||
vpxor (6 * 32)(%rdx), RB2, RB2;
|
||||
vpxor (7 * 32)(%rdx), RB3, RB3;
|
||||
|
||||
vmovdqu RA0, (0 * 32)(%rsi);
|
||||
vmovdqu RA1, (1 * 32)(%rsi);
|
||||
vmovdqu RA2, (2 * 32)(%rsi);
|
||||
vmovdqu RA3, (3 * 32)(%rsi);
|
||||
vmovdqu RB0, (4 * 32)(%rsi);
|
||||
vmovdqu RB1, (5 * 32)(%rsi);
|
||||
vmovdqu RB2, (6 * 32)(%rsi);
|
||||
vmovdqu RB3, (7 * 32)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vzeroupper;
|
||||
|
||||
vmovdqu (0 * 32)(%rdx), RA0;
|
||||
vmovdqu (1 * 32)(%rdx), RA1;
|
||||
vmovdqu (2 * 32)(%rdx), RA2;
|
||||
vmovdqu (3 * 32)(%rdx), RA3;
|
||||
vmovdqu (4 * 32)(%rdx), RB0;
|
||||
vmovdqu (5 * 32)(%rdx), RB1;
|
||||
vmovdqu (6 * 32)(%rdx), RB2;
|
||||
vmovdqu (7 * 32)(%rdx), RB3;
|
||||
|
||||
call __sm4_crypt_blk16;
|
||||
|
||||
vmovdqu (%rcx), RNOTx;
|
||||
vinserti128 $1, (%rdx), RNOT, RNOT;
|
||||
vpxor RNOT, RA0, RA0;
|
||||
vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
|
||||
vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
|
||||
vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
|
||||
vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
|
||||
vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
|
||||
vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
|
||||
vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
|
||||
vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
|
||||
vmovdqu RNOTx, (%rcx); /* store new IV */
|
||||
|
||||
vmovdqu RA0, (0 * 32)(%rsi);
|
||||
vmovdqu RA1, (1 * 32)(%rsi);
|
||||
vmovdqu RA2, (2 * 32)(%rsi);
|
||||
vmovdqu RA3, (3 * 32)(%rsi);
|
||||
vmovdqu RB0, (4 * 32)(%rsi);
|
||||
vmovdqu RB1, (5 * 32)(%rsi);
|
||||
vmovdqu RB2, (6 * 32)(%rsi);
|
||||
vmovdqu RB3, (7 * 32)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vzeroupper;
|
||||
|
||||
/* Load input */
|
||||
vmovdqu (%rcx), RNOTx;
|
||||
vinserti128 $1, (%rdx), RNOT, RA0;
|
||||
vmovdqu (0 * 32 + 16)(%rdx), RA1;
|
||||
vmovdqu (1 * 32 + 16)(%rdx), RA2;
|
||||
vmovdqu (2 * 32 + 16)(%rdx), RA3;
|
||||
vmovdqu (3 * 32 + 16)(%rdx), RB0;
|
||||
vmovdqu (4 * 32 + 16)(%rdx), RB1;
|
||||
vmovdqu (5 * 32 + 16)(%rdx), RB2;
|
||||
vmovdqu (6 * 32 + 16)(%rdx), RB3;
|
||||
|
||||
/* Update IV */
|
||||
vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
|
||||
vmovdqu RNOTx, (%rcx);
|
||||
|
||||
call __sm4_crypt_blk16;
|
||||
|
||||
vpxor (0 * 32)(%rdx), RA0, RA0;
|
||||
vpxor (1 * 32)(%rdx), RA1, RA1;
|
||||
vpxor (2 * 32)(%rdx), RA2, RA2;
|
||||
vpxor (3 * 32)(%rdx), RA3, RA3;
|
||||
vpxor (4 * 32)(%rdx), RB0, RB0;
|
||||
vpxor (5 * 32)(%rdx), RB1, RB1;
|
||||
vpxor (6 * 32)(%rdx), RB2, RB2;
|
||||
vpxor (7 * 32)(%rdx), RB3, RB3;
|
||||
|
||||
vmovdqu RA0, (0 * 32)(%rsi);
|
||||
vmovdqu RA1, (1 * 32)(%rsi);
|
||||
vmovdqu RA2, (2 * 32)(%rsi);
|
||||
vmovdqu RA3, (3 * 32)(%rsi);
|
||||
vmovdqu RB0, (4 * 32)(%rsi);
|
||||
vmovdqu RB1, (5 * 32)(%rsi);
|
||||
vmovdqu RB2, (6 * 32)(%rsi);
|
||||
vmovdqu RB3, (7 * 32)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)
|
||||
24
arch/x86/crypto/sm4-avx.h
Normal file
24
arch/x86/crypto/sm4-avx.h
Normal file
@@ -0,0 +1,24 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef ASM_X86_SM4_AVX_H
|
||||
#define ASM_X86_SM4_AVX_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sm4.h>
|
||||
|
||||
typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv);
|
||||
|
||||
int sm4_avx_ecb_encrypt(struct skcipher_request *req);
|
||||
int sm4_avx_ecb_decrypt(struct skcipher_request *req);
|
||||
|
||||
int sm4_cbc_encrypt(struct skcipher_request *req);
|
||||
int sm4_avx_cbc_decrypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func);
|
||||
|
||||
int sm4_cfb_encrypt(struct skcipher_request *req);
|
||||
int sm4_avx_cfb_decrypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func);
|
||||
|
||||
int sm4_avx_ctr_crypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func);
|
||||
|
||||
#endif
|
||||
169
arch/x86/crypto/sm4_aesni_avx2_glue.c
Normal file
169
arch/x86/crypto/sm4_aesni_avx2_glue.c
Normal file
@@ -0,0 +1,169 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
|
||||
* as specified in
|
||||
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
|
||||
*
|
||||
* Copyright (c) 2021, Alibaba Group.
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/sm4.h>
|
||||
#include "sm4-avx.h"
|
||||
|
||||
#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16)
|
||||
|
||||
asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
|
||||
static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return sm4_expandkey(ctx, key, key_len);
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
|
||||
sm4_aesni_avx2_cbc_dec_blk16);
|
||||
}
|
||||
|
||||
|
||||
static int cfb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
|
||||
sm4_aesni_avx2_cfb_dec_blk16);
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE,
|
||||
sm4_aesni_avx2_ctr_enc_blk16);
|
||||
}
|
||||
|
||||
static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
|
||||
{
|
||||
.base = {
|
||||
.cra_name = "__ecb(sm4)",
|
||||
.cra_driver_name = "__ecb-sm4-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.walksize = 16 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_avx_ecb_encrypt,
|
||||
.decrypt = sm4_avx_ecb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cbc(sm4)",
|
||||
.cra_driver_name = "__cbc-sm4-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.walksize = 16 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cfb(sm4)",
|
||||
.cra_driver_name = "__cfb-sm4-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.walksize = 16 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_cfb_encrypt,
|
||||
.decrypt = cfb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__ctr(sm4)",
|
||||
.cra_driver_name = "__ctr-sm4-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.walksize = 16 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
}
|
||||
};
|
||||
|
||||
static struct simd_skcipher_alg *
|
||||
simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
|
||||
|
||||
static int __init sm4_init(void)
|
||||
{
|
||||
const char *feature_name;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_AVX) ||
|
||||
!boot_cpu_has(X86_FEATURE_AVX2) ||
|
||||
!boot_cpu_has(X86_FEATURE_AES) ||
|
||||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
|
||||
pr_info("AVX2 or AES-NI instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
|
||||
ARRAY_SIZE(sm4_aesni_avx2_skciphers),
|
||||
simd_sm4_aesni_avx2_skciphers);
|
||||
}
|
||||
|
||||
static void __exit sm4_exit(void)
|
||||
{
|
||||
simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
|
||||
ARRAY_SIZE(sm4_aesni_avx2_skciphers),
|
||||
simd_sm4_aesni_avx2_skciphers);
|
||||
}
|
||||
|
||||
module_init(sm4_init);
|
||||
module_exit(sm4_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
|
||||
MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized");
|
||||
MODULE_ALIAS_CRYPTO("sm4");
|
||||
MODULE_ALIAS_CRYPTO("sm4-aesni-avx2");
|
||||
487
arch/x86/crypto/sm4_aesni_avx_glue.c
Normal file
487
arch/x86/crypto/sm4_aesni_avx_glue.c
Normal file
@@ -0,0 +1,487 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* SM4 Cipher Algorithm, AES-NI/AVX optimized.
|
||||
* as specified in
|
||||
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
|
||||
*
|
||||
* Copyright (c) 2021, Alibaba Group.
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/sm4.h>
|
||||
#include "sm4-avx.h"
|
||||
|
||||
#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8)
|
||||
|
||||
asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
|
||||
const u8 *src, int nblocks);
|
||||
asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
|
||||
const u8 *src, int nblocks);
|
||||
asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
|
||||
static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return sm4_expandkey(ctx, key, key_len);
|
||||
}
|
||||
|
||||
static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
kernel_fpu_begin();
|
||||
while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
|
||||
sm4_aesni_avx_crypt8(rkey, dst, src, 8);
|
||||
dst += SM4_CRYPT8_BLOCK_SIZE;
|
||||
src += SM4_CRYPT8_BLOCK_SIZE;
|
||||
nbytes -= SM4_CRYPT8_BLOCK_SIZE;
|
||||
}
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
unsigned int nblocks = min(nbytes >> 4, 4u);
|
||||
sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
|
||||
dst += nblocks * SM4_BLOCK_SIZE;
|
||||
src += nblocks * SM4_BLOCK_SIZE;
|
||||
nbytes -= nblocks * SM4_BLOCK_SIZE;
|
||||
}
|
||||
kernel_fpu_end();
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int sm4_avx_ecb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return ecb_do_crypt(req, ctx->rkey_enc);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
|
||||
|
||||
int sm4_avx_ecb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return ecb_do_crypt(req, ctx->rkey_dec);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
|
||||
|
||||
int sm4_cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *iv = walk.iv;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
|
||||
sm4_crypt_block(ctx->rkey_enc, dst, dst);
|
||||
iv = dst;
|
||||
src += SM4_BLOCK_SIZE;
|
||||
dst += SM4_BLOCK_SIZE;
|
||||
nbytes -= SM4_BLOCK_SIZE;
|
||||
}
|
||||
if (iv != walk.iv)
|
||||
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
|
||||
|
||||
int sm4_avx_cbc_decrypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (nbytes >= bsize) {
|
||||
func(ctx->rkey_dec, dst, src, walk.iv);
|
||||
dst += bsize;
|
||||
src += bsize;
|
||||
nbytes -= bsize;
|
||||
}
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
u8 keystream[SM4_BLOCK_SIZE * 8];
|
||||
u8 iv[SM4_BLOCK_SIZE];
|
||||
unsigned int nblocks = min(nbytes >> 4, 8u);
|
||||
int i;
|
||||
|
||||
sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
|
||||
src, nblocks);
|
||||
|
||||
src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
|
||||
dst += (nblocks - 1) * SM4_BLOCK_SIZE;
|
||||
memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
|
||||
|
||||
for (i = nblocks - 1; i > 0; i--) {
|
||||
crypto_xor_cpy(dst, src,
|
||||
&keystream[i * SM4_BLOCK_SIZE],
|
||||
SM4_BLOCK_SIZE);
|
||||
src -= SM4_BLOCK_SIZE;
|
||||
dst -= SM4_BLOCK_SIZE;
|
||||
}
|
||||
crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
|
||||
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
|
||||
dst += nblocks * SM4_BLOCK_SIZE;
|
||||
src += (nblocks + 1) * SM4_BLOCK_SIZE;
|
||||
nbytes -= nblocks * SM4_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
|
||||
sm4_aesni_avx_cbc_dec_blk8);
|
||||
}
|
||||
|
||||
int sm4_cfb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
u8 keystream[SM4_BLOCK_SIZE];
|
||||
const u8 *iv = walk.iv;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
sm4_crypt_block(ctx->rkey_enc, keystream, iv);
|
||||
crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
|
||||
iv = dst;
|
||||
src += SM4_BLOCK_SIZE;
|
||||
dst += SM4_BLOCK_SIZE;
|
||||
nbytes -= SM4_BLOCK_SIZE;
|
||||
}
|
||||
if (iv != walk.iv)
|
||||
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
|
||||
|
||||
/* tail */
|
||||
if (walk.nbytes == walk.total && nbytes > 0) {
|
||||
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
|
||||
crypto_xor_cpy(dst, src, keystream, nbytes);
|
||||
nbytes = 0;
|
||||
}
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
|
||||
|
||||
int sm4_avx_cfb_decrypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (nbytes >= bsize) {
|
||||
func(ctx->rkey_enc, dst, src, walk.iv);
|
||||
dst += bsize;
|
||||
src += bsize;
|
||||
nbytes -= bsize;
|
||||
}
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
u8 keystream[SM4_BLOCK_SIZE * 8];
|
||||
unsigned int nblocks = min(nbytes >> 4, 8u);
|
||||
|
||||
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
|
||||
if (nblocks > 1)
|
||||
memcpy(&keystream[SM4_BLOCK_SIZE], src,
|
||||
(nblocks - 1) * SM4_BLOCK_SIZE);
|
||||
memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
|
||||
SM4_BLOCK_SIZE);
|
||||
|
||||
sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
|
||||
keystream, nblocks);
|
||||
|
||||
crypto_xor_cpy(dst, src, keystream,
|
||||
nblocks * SM4_BLOCK_SIZE);
|
||||
dst += nblocks * SM4_BLOCK_SIZE;
|
||||
src += nblocks * SM4_BLOCK_SIZE;
|
||||
nbytes -= nblocks * SM4_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
|
||||
/* tail */
|
||||
if (walk.nbytes == walk.total && nbytes > 0) {
|
||||
u8 keystream[SM4_BLOCK_SIZE];
|
||||
|
||||
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
|
||||
crypto_xor_cpy(dst, src, keystream, nbytes);
|
||||
nbytes = 0;
|
||||
}
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
|
||||
|
||||
static int cfb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
|
||||
sm4_aesni_avx_cfb_dec_blk8);
|
||||
}
|
||||
|
||||
int sm4_avx_ctr_crypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (nbytes >= bsize) {
|
||||
func(ctx->rkey_enc, dst, src, walk.iv);
|
||||
dst += bsize;
|
||||
src += bsize;
|
||||
nbytes -= bsize;
|
||||
}
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
u8 keystream[SM4_BLOCK_SIZE * 8];
|
||||
unsigned int nblocks = min(nbytes >> 4, 8u);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nblocks; i++) {
|
||||
memcpy(&keystream[i * SM4_BLOCK_SIZE],
|
||||
walk.iv, SM4_BLOCK_SIZE);
|
||||
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
|
||||
}
|
||||
sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
|
||||
keystream, nblocks);
|
||||
|
||||
crypto_xor_cpy(dst, src, keystream,
|
||||
nblocks * SM4_BLOCK_SIZE);
|
||||
dst += nblocks * SM4_BLOCK_SIZE;
|
||||
src += nblocks * SM4_BLOCK_SIZE;
|
||||
nbytes -= nblocks * SM4_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
|
||||
/* tail */
|
||||
if (walk.nbytes == walk.total && nbytes > 0) {
|
||||
u8 keystream[SM4_BLOCK_SIZE];
|
||||
|
||||
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
|
||||
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
|
||||
|
||||
sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
|
||||
|
||||
crypto_xor_cpy(dst, src, keystream, nbytes);
|
||||
dst += nbytes;
|
||||
src += nbytes;
|
||||
nbytes = 0;
|
||||
}
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
|
||||
|
||||
static int ctr_crypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
|
||||
sm4_aesni_avx_ctr_enc_blk8);
|
||||
}
|
||||
|
||||
static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
|
||||
{
|
||||
.base = {
|
||||
.cra_name = "__ecb(sm4)",
|
||||
.cra_driver_name = "__ecb-sm4-aesni-avx",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.walksize = 8 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_avx_ecb_encrypt,
|
||||
.decrypt = sm4_avx_ecb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cbc(sm4)",
|
||||
.cra_driver_name = "__cbc-sm4-aesni-avx",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.walksize = 8 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cfb(sm4)",
|
||||
.cra_driver_name = "__cfb-sm4-aesni-avx",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.walksize = 8 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_cfb_encrypt,
|
||||
.decrypt = cfb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__ctr(sm4)",
|
||||
.cra_driver_name = "__ctr-sm4-aesni-avx",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.walksize = 8 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
}
|
||||
};
|
||||
|
||||
static struct simd_skcipher_alg *
|
||||
simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
|
||||
|
||||
static int __init sm4_init(void)
|
||||
{
|
||||
const char *feature_name;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_AVX) ||
|
||||
!boot_cpu_has(X86_FEATURE_AES) ||
|
||||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
|
||||
pr_info("AVX or AES-NI instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
|
||||
ARRAY_SIZE(sm4_aesni_avx_skciphers),
|
||||
simd_sm4_aesni_avx_skciphers);
|
||||
}
|
||||
|
||||
static void __exit sm4_exit(void)
|
||||
{
|
||||
simd_unregister_skciphers(sm4_aesni_avx_skciphers,
|
||||
ARRAY_SIZE(sm4_aesni_avx_skciphers),
|
||||
simd_sm4_aesni_avx_skciphers);
|
||||
}
|
||||
|
||||
module_init(sm4_init);
|
||||
module_exit(sm4_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
|
||||
MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
|
||||
MODULE_ALIAS_CRYPTO("sm4");
|
||||
MODULE_ALIAS_CRYPTO("sm4-aesni-avx");
|
||||
@@ -1547,6 +1547,7 @@ config CRYPTO_SERPENT_AVX2_X86_64
|
||||
config CRYPTO_SM4
|
||||
tristate "SM4 cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_LIB_SM4
|
||||
help
|
||||
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
|
||||
|
||||
@@ -1569,6 +1570,49 @@ config CRYPTO_SM4
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_SM4_AESNI_AVX_X86_64
|
||||
tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_SIMD
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_LIB_SM4
|
||||
help
|
||||
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
|
||||
|
||||
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
|
||||
Organization of State Commercial Administration of China (OSCCA)
|
||||
as an authorized cryptographic algorithms for the use within China.
|
||||
|
||||
This is SM4 optimized implementation using AES-NI/AVX/x86_64
|
||||
instruction set for block cipher. Through two affine transforms,
|
||||
we can use the AES S-Box to simulate the SM4 S-Box to achieve the
|
||||
effect of instruction acceleration.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_SM4_AESNI_AVX2_X86_64
|
||||
tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_SIMD
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_LIB_SM4
|
||||
select CRYPTO_SM4_AESNI_AVX_X86_64
|
||||
help
|
||||
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).
|
||||
|
||||
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
|
||||
Organization of State Commercial Administration of China (OSCCA)
|
||||
as an authorized cryptographic algorithms for the use within China.
|
||||
|
||||
This is SM4 optimized implementation using AES-NI/AVX2/x86_64
|
||||
instruction set for block cipher. Through two affine transforms,
|
||||
we can use the AES S-Box to simulate the SM4 S-Box to achieve the
|
||||
effect of instruction acceleration.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_TEA
|
||||
tristate "TEA, XTEA and XETA cipher algorithms"
|
||||
depends on CRYPTO_USER_API_ENABLE_OBSOLETE
|
||||
|
||||
@@ -74,7 +74,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o
|
||||
obj-$(CONFIG_CRYPTO_MD4) += md4.o
|
||||
obj-$(CONFIG_CRYPTO_MD5) += md5.o
|
||||
obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o
|
||||
obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define _CRYPTO_ECC_H
|
||||
|
||||
#include <crypto/ecc_curve.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
/* One digit is u64 qword. */
|
||||
#define ECC_CURVE_NIST_P192_DIGITS 3
|
||||
@@ -46,13 +47,13 @@
|
||||
* @out: Output array
|
||||
* @ndigits: Number of digits to copy
|
||||
*/
|
||||
static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits)
|
||||
static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigits)
|
||||
{
|
||||
const __be64 *src = (__force __be64 *)in;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ndigits; i++)
|
||||
out[i] = be64_to_cpu(src[ndigits - 1 - i]);
|
||||
out[i] = get_unaligned_be64(&src[ndigits - 1 - i]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -143,9 +143,6 @@ sha512_transform(u64 *state, const u8 *input)
|
||||
|
||||
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
|
||||
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
|
||||
|
||||
/* erase our data */
|
||||
a = b = c = d = e = f = g = h = t1 = t2 = 0;
|
||||
}
|
||||
|
||||
static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
|
||||
|
||||
@@ -431,7 +431,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
|
||||
|
||||
static int skcipher_walk_first(struct skcipher_walk *walk)
|
||||
{
|
||||
if (WARN_ON_ONCE(in_irq()))
|
||||
if (WARN_ON_ONCE(in_hardirq()))
|
||||
return -EDEADLK;
|
||||
|
||||
walk->buffer = NULL;
|
||||
|
||||
@@ -16,191 +16,43 @@
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
static const u32 fk[4] = {
|
||||
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
|
||||
};
|
||||
|
||||
static const u8 sbox[256] = {
|
||||
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
|
||||
0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
|
||||
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
|
||||
0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
|
||||
0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
|
||||
0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
|
||||
0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
|
||||
0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
|
||||
0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
|
||||
0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
|
||||
0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
|
||||
0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
|
||||
0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
|
||||
0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
|
||||
0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
|
||||
0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
|
||||
0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
|
||||
0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
|
||||
0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
|
||||
0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
|
||||
0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
|
||||
0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
|
||||
0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
|
||||
0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
|
||||
0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
|
||||
0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
|
||||
0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
|
||||
0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
|
||||
0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
|
||||
0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
|
||||
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
|
||||
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
|
||||
};
|
||||
|
||||
static const u32 ck[] = {
|
||||
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
|
||||
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
|
||||
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
|
||||
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
|
||||
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
|
||||
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
|
||||
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
|
||||
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
|
||||
};
|
||||
|
||||
static u32 sm4_t_non_lin_sub(u32 x)
|
||||
{
|
||||
int i;
|
||||
u8 *b = (u8 *)&x;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
b[i] = sbox[b[i]];
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
static u32 sm4_key_lin_sub(u32 x)
|
||||
{
|
||||
return x ^ rol32(x, 13) ^ rol32(x, 23);
|
||||
|
||||
}
|
||||
|
||||
static u32 sm4_enc_lin_sub(u32 x)
|
||||
{
|
||||
return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
|
||||
}
|
||||
|
||||
static u32 sm4_key_sub(u32 x)
|
||||
{
|
||||
return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
|
||||
}
|
||||
|
||||
static u32 sm4_enc_sub(u32 x)
|
||||
{
|
||||
return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
|
||||
}
|
||||
|
||||
static u32 sm4_round(const u32 *x, const u32 rk)
|
||||
{
|
||||
return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
|
||||
* @ctx: The location where the computed key will be stored.
|
||||
* @in_key: The supplied key.
|
||||
* @key_len: The length of the supplied key.
|
||||
*
|
||||
* Returns 0 on success. The function fails only if an invalid key size (or
|
||||
* pointer) is supplied.
|
||||
*/
|
||||
int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
u32 rk[4], t;
|
||||
const u32 *key = (u32 *)in_key;
|
||||
int i;
|
||||
|
||||
if (key_len != SM4_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
|
||||
ctx->rkey_enc[i] = t;
|
||||
rk[0] = rk[1];
|
||||
rk[1] = rk[2];
|
||||
rk[2] = rk[3];
|
||||
rk[3] = t;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; ++i)
|
||||
ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
|
||||
|
||||
/**
|
||||
* crypto_sm4_set_key - Set the SM4 key.
|
||||
* sm4_setkey - Set the SM4 key.
|
||||
* @tfm: The %crypto_tfm that is used in the context.
|
||||
* @in_key: The input key.
|
||||
* @key_len: The size of the key.
|
||||
*
|
||||
* This function uses crypto_sm4_expand_key() to expand the key.
|
||||
* &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
|
||||
* This function uses sm4_expandkey() to expand the key.
|
||||
* &sm4_ctx _must_ be the private data embedded in @tfm which is
|
||||
* retrieved with crypto_tfm_ctx().
|
||||
*
|
||||
* Return: 0 on success; -EINVAL on failure (only happens for bad key lengths)
|
||||
*/
|
||||
int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
return crypto_sm4_expand_key(ctx, in_key, key_len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
|
||||
|
||||
static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
|
||||
{
|
||||
u32 x[4], i, t;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
x[i] = get_unaligned_be32(&in[i]);
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
t = sm4_round(x, rk[i]);
|
||||
x[0] = x[1];
|
||||
x[1] = x[2];
|
||||
x[2] = x[3];
|
||||
x[3] = t;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
put_unaligned_be32(x[3 - i], &out[i]);
|
||||
return sm4_expandkey(ctx, in_key, key_len);
|
||||
}
|
||||
|
||||
/* encrypt a block of text */
|
||||
|
||||
void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
|
||||
sm4_crypt_block(ctx->rkey_enc, out, in);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
|
||||
|
||||
/* decrypt a block of text */
|
||||
|
||||
void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
|
||||
sm4_crypt_block(ctx->rkey_dec, out, in);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
|
||||
|
||||
static struct crypto_alg sm4_alg = {
|
||||
.cra_name = "sm4",
|
||||
@@ -208,15 +60,15 @@ static struct crypto_alg sm4_alg = {
|
||||
.cra_priority = 100,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = SM4_KEY_SIZE,
|
||||
.cia_max_keysize = SM4_KEY_SIZE,
|
||||
.cia_setkey = crypto_sm4_set_key,
|
||||
.cia_encrypt = crypto_sm4_encrypt,
|
||||
.cia_decrypt = crypto_sm4_decrypt
|
||||
.cia_setkey = sm4_setkey,
|
||||
.cia_encrypt = sm4_encrypt,
|
||||
.cia_decrypt = sm4_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
100
crypto/tcrypt.c
100
crypto/tcrypt.c
@@ -77,7 +77,7 @@ static const char *check[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 };
|
||||
static const int block_sizes[] = { 16, 64, 128, 256, 1024, 1420, 4096, 0 };
|
||||
static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 };
|
||||
|
||||
#define XBUFSIZE 8
|
||||
@@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
|
||||
}
|
||||
|
||||
ret = crypto_aead_setauthsize(tfm, authsize);
|
||||
if (ret) {
|
||||
pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
|
||||
ret);
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_mb; ++i)
|
||||
if (testmgr_alloc_buf(data[i].xbuf)) {
|
||||
@@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
|
||||
for (i = 0; i < num_mb; ++i) {
|
||||
data[i].req = aead_request_alloc(tfm, GFP_KERNEL);
|
||||
if (!data[i].req) {
|
||||
pr_err("alg: skcipher: Failed to allocate request for %s\n",
|
||||
pr_err("alg: aead: Failed to allocate request for %s\n",
|
||||
algo);
|
||||
while (i--)
|
||||
aead_request_free(data[i].req);
|
||||
@@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
|
||||
sgout = &sg[9];
|
||||
|
||||
tfm = crypto_alloc_aead(algo, 0, 0);
|
||||
|
||||
if (IS_ERR(tfm)) {
|
||||
pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
|
||||
PTR_ERR(tfm));
|
||||
goto out_notfm;
|
||||
}
|
||||
|
||||
ret = crypto_aead_setauthsize(tfm, authsize);
|
||||
if (ret) {
|
||||
pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
|
||||
ret);
|
||||
goto out_noreq;
|
||||
}
|
||||
|
||||
crypto_init_wait(&wait);
|
||||
printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
|
||||
get_driver_name(crypto_aead, tfm), e);
|
||||
@@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ret = crypto_aead_setkey(tfm, key, *keysize);
|
||||
ret = crypto_aead_setauthsize(tfm, authsize);
|
||||
if (ret) {
|
||||
pr_err("setkey() failed flags=%x: %d\n",
|
||||
crypto_aead_get_flags(tfm), ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
iv_len = crypto_aead_ivsize(tfm);
|
||||
if (iv_len)
|
||||
@@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
|
||||
printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
|
||||
i, *keysize * 8, bs);
|
||||
|
||||
|
||||
memset(tvmem[0], 0xff, PAGE_SIZE);
|
||||
|
||||
if (ret) {
|
||||
pr_err("setkey() failed flags=%x\n",
|
||||
crypto_aead_get_flags(tfm));
|
||||
goto out;
|
||||
}
|
||||
|
||||
sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize),
|
||||
assoc, aad_size);
|
||||
|
||||
@@ -1907,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("streebog512");
|
||||
break;
|
||||
|
||||
case 55:
|
||||
ret += tcrypt_test("gcm(sm4)");
|
||||
break;
|
||||
|
||||
case 56:
|
||||
ret += tcrypt_test("ccm(sm4)");
|
||||
break;
|
||||
|
||||
case 100:
|
||||
ret += tcrypt_test("hmac(md5)");
|
||||
break;
|
||||
@@ -1998,6 +2015,15 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
case 157:
|
||||
ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
|
||||
break;
|
||||
|
||||
case 158:
|
||||
ret += tcrypt_test("cbcmac(sm4)");
|
||||
break;
|
||||
|
||||
case 159:
|
||||
ret += tcrypt_test("cmac(sm4)");
|
||||
break;
|
||||
|
||||
case 181:
|
||||
ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
|
||||
break;
|
||||
@@ -2031,6 +2057,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
case 191:
|
||||
ret += tcrypt_test("ecb(sm4)");
|
||||
ret += tcrypt_test("cbc(sm4)");
|
||||
ret += tcrypt_test("cfb(sm4)");
|
||||
ret += tcrypt_test("ctr(sm4)");
|
||||
break;
|
||||
case 200:
|
||||
@@ -2289,6 +2316,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
speed_template_16);
|
||||
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
|
||||
@@ -2322,6 +2353,34 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
NULL, 0, 16, 8, speed_template_16);
|
||||
break;
|
||||
|
||||
case 222:
|
||||
test_aead_speed("gcm(sm4)", ENCRYPT, sec,
|
||||
NULL, 0, 16, 8, speed_template_16);
|
||||
test_aead_speed("gcm(sm4)", DECRYPT, sec,
|
||||
NULL, 0, 16, 8, speed_template_16);
|
||||
break;
|
||||
|
||||
case 223:
|
||||
test_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec,
|
||||
NULL, 0, 16, 16, aead_speed_template_19);
|
||||
test_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec,
|
||||
NULL, 0, 16, 16, aead_speed_template_19);
|
||||
break;
|
||||
|
||||
case 224:
|
||||
test_mb_aead_speed("gcm(sm4)", ENCRYPT, sec, NULL, 0, 16, 8,
|
||||
speed_template_16, num_mb);
|
||||
test_mb_aead_speed("gcm(sm4)", DECRYPT, sec, NULL, 0, 16, 8,
|
||||
speed_template_16, num_mb);
|
||||
break;
|
||||
|
||||
case 225:
|
||||
test_mb_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, NULL, 0,
|
||||
16, 16, aead_speed_template_19, num_mb);
|
||||
test_mb_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, NULL, 0,
|
||||
16, 16, aead_speed_template_19, num_mb);
|
||||
break;
|
||||
|
||||
case 300:
|
||||
if (alg) {
|
||||
test_hash_speed(alg, sec, generic_hash_speed_template);
|
||||
@@ -2757,6 +2816,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
speed_template_8_32);
|
||||
break;
|
||||
|
||||
case 518:
|
||||
test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
break;
|
||||
|
||||
case 600:
|
||||
test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32, num_mb);
|
||||
|
||||
@@ -4450,6 +4450,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(aes_cbcmac_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "cbcmac(sm4)",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(sm4_cbcmac_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ccm(aes)",
|
||||
.generic_driver = "ccm_base(ctr(aes-generic),cbcmac(aes-generic))",
|
||||
@@ -4461,6 +4467,16 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.einval_allowed = 1,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.alg = "ccm(sm4)",
|
||||
.generic_driver = "ccm_base(ctr(sm4-generic),cbcmac(sm4-generic))",
|
||||
.test = alg_test_aead,
|
||||
.suite = {
|
||||
.aead = {
|
||||
____VECS(sm4_ccm_tv_template),
|
||||
.einval_allowed = 1,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.alg = "cfb(aes)",
|
||||
.test = alg_test_skcipher,
|
||||
@@ -4494,6 +4510,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(des3_ede_cmac64_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "cmac(sm4)",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(sm4_cmac128_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "compress_null",
|
||||
.test = alg_test_null,
|
||||
@@ -4967,6 +4989,13 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.aead = __VECS(aes_gcm_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "gcm(sm4)",
|
||||
.generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)",
|
||||
.test = alg_test_aead,
|
||||
.suite = {
|
||||
.aead = __VECS(sm4_gcm_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ghash",
|
||||
.test = alg_test_hash,
|
||||
|
||||
148
crypto/testmgr.h
148
crypto/testmgr.h
@@ -13328,6 +13328,154 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = {
|
||||
}
|
||||
};
|
||||
|
||||
static const struct aead_testvec sm4_gcm_tv_template[] = {
|
||||
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */
|
||||
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
|
||||
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
|
||||
.klen = 16,
|
||||
.iv = "\x00\x00\x12\x34\x56\x78\x00\x00"
|
||||
"\x00\x00\xAB\xCD",
|
||||
.ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
|
||||
"\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
|
||||
"\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
|
||||
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
|
||||
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
|
||||
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
|
||||
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
|
||||
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
|
||||
.plen = 64,
|
||||
.assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
|
||||
"\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
|
||||
"\xAB\xAD\xDA\xD2",
|
||||
.alen = 20,
|
||||
.ctext = "\x17\xF3\x99\xF0\x8C\x67\xD5\xEE"
|
||||
"\x19\xD0\xDC\x99\x69\xC4\xBB\x7D"
|
||||
"\x5F\xD4\x6F\xD3\x75\x64\x89\x06"
|
||||
"\x91\x57\xB2\x82\xBB\x20\x07\x35"
|
||||
"\xD8\x27\x10\xCA\x5C\x22\xF0\xCC"
|
||||
"\xFA\x7C\xBF\x93\xD4\x96\xAC\x15"
|
||||
"\xA5\x68\x34\xCB\xCF\x98\xC3\x97"
|
||||
"\xB4\x02\x4A\x26\x91\x23\x3B\x8D"
|
||||
"\x83\xDE\x35\x41\xE4\xC2\xB5\x81"
|
||||
"\x77\xE0\x65\xA9\xBF\x7B\x62\xEC",
|
||||
.clen = 80,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct aead_testvec sm4_ccm_tv_template[] = {
|
||||
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.2 */
|
||||
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
|
||||
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
|
||||
.klen = 16,
|
||||
.iv = "\x02\x00\x00\x12\x34\x56\x78\x00"
|
||||
"\x00\x00\x00\xAB\xCD\x00\x00\x00",
|
||||
.ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
|
||||
"\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
|
||||
"\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
|
||||
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
|
||||
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
|
||||
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
|
||||
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
|
||||
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
|
||||
.plen = 64,
|
||||
.assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
|
||||
"\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
|
||||
"\xAB\xAD\xDA\xD2",
|
||||
.alen = 20,
|
||||
.ctext = "\x48\xAF\x93\x50\x1F\xA6\x2A\xDB"
|
||||
"\xCD\x41\x4C\xCE\x60\x34\xD8\x95"
|
||||
"\xDD\xA1\xBF\x8F\x13\x2F\x04\x20"
|
||||
"\x98\x66\x15\x72\xE7\x48\x30\x94"
|
||||
"\xFD\x12\xE5\x18\xCE\x06\x2C\x98"
|
||||
"\xAC\xEE\x28\xD9\x5D\xF4\x41\x6B"
|
||||
"\xED\x31\xA2\xF0\x44\x76\xC1\x8B"
|
||||
"\xB4\x0C\x84\xA7\x4B\x97\xDC\x5B"
|
||||
"\x16\x84\x2D\x4F\xA1\x86\xF5\x6A"
|
||||
"\xB3\x32\x56\x97\x1F\xA1\x10\xF4",
|
||||
.clen = 80,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hash_testvec sm4_cbcmac_tv_template[] = {
|
||||
{
|
||||
.key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
|
||||
"\x77\x66\x55\x44\x33\x22\x11\x00",
|
||||
.plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xba\x98\x76\x54\x32\x10",
|
||||
.digest = "\x97\xb4\x75\x8f\x84\x92\x3d\x3f"
|
||||
"\x86\x81\x0e\x0e\xea\x14\x6d\x73",
|
||||
.psize = 16,
|
||||
.ksize = 16,
|
||||
}, {
|
||||
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
|
||||
.plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
|
||||
"\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
|
||||
"\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
|
||||
"\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
|
||||
"\xee",
|
||||
.digest = "\xc7\xdb\x17\x71\xa1\x5c\x0d\x22"
|
||||
"\xa3\x39\x3a\x31\x88\x91\x49\xa1",
|
||||
.psize = 33,
|
||||
.ksize = 16,
|
||||
}, {
|
||||
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
|
||||
.plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
|
||||
"\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
|
||||
"\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
|
||||
"\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
|
||||
"\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
|
||||
"\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
|
||||
"\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
|
||||
"\xfd\xdb\xb1\x9b\x76\x5c\x37",
|
||||
.digest = "\x9b\x07\x88\x7f\xd5\x95\x23\x12"
|
||||
"\x64\x0a\x66\x7f\x4e\x25\xca\xd0",
|
||||
.psize = 63,
|
||||
.ksize = 16,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hash_testvec sm4_cmac128_tv_template[] = {
|
||||
{
|
||||
.key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
|
||||
"\x77\x66\x55\x44\x33\x22\x11\x00",
|
||||
.plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xba\x98\x76\x54\x32\x10",
|
||||
.digest = "\x00\xd4\x63\xb4\x9a\xf3\x52\xe2"
|
||||
"\x74\xa9\x00\x55\x13\x54\x2a\xd1",
|
||||
.psize = 16,
|
||||
.ksize = 16,
|
||||
}, {
|
||||
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
|
||||
.plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
|
||||
"\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
|
||||
"\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
|
||||
"\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
|
||||
"\xee",
|
||||
.digest = "\x8a\x8a\xe9\xc0\xc8\x97\x0e\x85"
|
||||
"\x21\x57\x02\x10\x1a\xbf\x9c\xc6",
|
||||
.psize = 33,
|
||||
.ksize = 16,
|
||||
}, {
|
||||
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
|
||||
.plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
|
||||
"\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
|
||||
"\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
|
||||
"\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
|
||||
"\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
|
||||
"\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
|
||||
"\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
|
||||
"\xfd\xdb\xb1\x9b\x76\x5c\x37",
|
||||
.digest = "\x5f\x14\xc9\xa9\x20\xb2\xb4\xf0"
|
||||
"\x76\xe0\xd8\xd6\xdc\x4f\xe1\xbc",
|
||||
.psize = 63,
|
||||
.ksize = 16,
|
||||
}
|
||||
};
|
||||
|
||||
/* Cast6 test vectors from RFC 2612 */
|
||||
static const struct cipher_testvec cast6_tv_template[] = {
|
||||
{
|
||||
|
||||
@@ -775,7 +775,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = {
|
||||
0xca2dbf07ad5a8333ULL,
|
||||
};
|
||||
|
||||
/**
|
||||
/*
|
||||
* The core Whirlpool transform.
|
||||
*/
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user