Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Algorithms:

   - Add AES-NI/AVX/x86_64 implementation of SM4.

  Drivers:

   - Add Arm SMCCC TRNG based driver"

[ And obviously a lot of random fixes and updates  - Linus]

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (84 commits)
  crypto: sha512 - remove imaginary and mystifying clearing of variables
  crypto: aesni - xts_crypt() return if walk.nbytes is 0
  padata: Remove repeated verbose license text
  crypto: ccp - Add support for new CCP/PSP device ID
  crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation
  crypto: x86/sm4 - export reusable AESNI/AVX functions
  crypto: rmd320 - remove rmd320 in Makefile
  crypto: skcipher - in_irq() cleanup
  crypto: hisilicon - check _PS0 and _PR0 method
  crypto: hisilicon - change parameter passing of debugfs function
  crypto: hisilicon - support runtime PM for accelerator device
  crypto: hisilicon - add runtime PM ops
  crypto: hisilicon - using 'debugfs_create_file' instead of 'debugfs_create_regset32'
  crypto: tcrypt - add GCM/CCM mode test for SM4 algorithm
  crypto: testmgr - Add GCM/CCM mode test of SM4 algorithm
  crypto: tcrypt - Fix missing return value check
  crypto: hisilicon/sec - modify the hardware endian configuration
  crypto: hisilicon/sec - fix the abnormal exiting process
  crypto: qat - store vf.compatible flag
  crypto: qat - do not export adf_iov_putmsg()
  ...
This commit is contained in:
Linus Torvalds
2021-08-30 12:57:10 -07:00
87 changed files with 3696 additions and 840 deletions

View File

@@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = {
.max_size = curve25519_max_size,
};
static int __init mod_init(void)
static int __init arm_curve25519_init(void)
{
if (elf_hwcap & HWCAP_NEON) {
static_branch_enable(&have_neon);
@@ -122,14 +122,14 @@ static int __init mod_init(void)
return 0;
}
static void __exit mod_exit(void)
static void __exit arm_curve25519_exit(void)
{
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
crypto_unregister_kpp(&curve25519_alg);
}
module_init(mod_init);
module_exit(mod_exit);
module_init(arm_curve25519_init);
module_exit(arm_curve25519_exit);
MODULE_ALIAS_CRYPTO("curve25519");
MODULE_ALIAS_CRYPTO("curve25519-neon");

View File

@@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_SM4
select CRYPTO_LIB_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"

View File

@@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2");
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
}
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
crypto_sm4_encrypt(tfm, out, in);
sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
@@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
crypto_sm4_decrypt(tfm, out, in);
sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
@@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = {
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = crypto_sm4_set_key,
.cia_setkey = sm4_ce_setkey,
.cia_encrypt = sm4_ce_encrypt,
.cia_decrypt = sm4_ce_decrypt
}

View File

@@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o
sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE

View File

@@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
if (!walk.nbytes)
return err;
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
@@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
skcipher_request_set_crypt(&subreq, req->src, req->dst,
blocks * AES_BLOCK_SIZE, req->iv);
req = &subreq;
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
} else {
tail = 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,497 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
* Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
* https://github.com/mjosaarinen/sm4ni
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define rRIP (%rip)
/* vector registers */
#define RX0 %ymm0
#define RX1 %ymm1
#define MASK_4BIT %ymm2
#define RTMP0 %ymm3
#define RTMP1 %ymm4
#define RTMP2 %ymm5
#define RTMP3 %ymm6
#define RTMP4 %ymm7
#define RA0 %ymm8
#define RA1 %ymm9
#define RA2 %ymm10
#define RA3 %ymm11
#define RB0 %ymm12
#define RB1 %ymm13
#define RB2 %ymm14
#define RB3 %ymm15
#define RNOT %ymm0
#define RBSWAP %ymm1
#define RX0x %xmm0
#define RX1x %xmm1
#define MASK_4BITx %xmm2
#define RNOTx %xmm0
#define RBSWAPx %xmm1
#define RTMP0x %xmm3
#define RTMP1x %xmm4
#define RTMP2x %xmm5
#define RTMP3x %xmm6
#define RTMP4x %xmm7
/* helper macros */
/* Transpose four 32-bit words between 128-bit vector lanes. */
#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
vpunpckhdq x1, x0, t2; \
vpunpckldq x1, x0, x0; \
\
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x2; \
\
vpunpckhqdq t1, x0, x1; \
vpunpcklqdq t1, x0, x0; \
\
vpunpckhqdq x2, t2, x3; \
vpunpcklqdq x2, t2, x2;
/* post-SubByte transform. */
#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
vpand x, mask4bit, tmp0; \
vpandn x, mask4bit, x; \
vpsrld $4, x, x; \
\
vpshufb tmp0, lo_t, tmp0; \
vpshufb x, hi_t, x; \
vpxor tmp0, x, x;
/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
* 'vaeslastenc' instruction. */
#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
vpandn mask4bit, x, tmp0; \
vpsrld $4, x, x; \
vpand x, mask4bit, x; \
\
vpshufb tmp0, lo_t, tmp0; \
vpshufb x, hi_t, x; \
vpxor tmp0, x, x;
.section .rodata.cst164, "aM", @progbits, 164
.align 16
/*
* Following four affine transform look-up tables are from work by
* Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
*
* These allow exposing SM4 S-Box from AES SubByte.
*/
/* pre-SubByte affine transform, from SM4 field to AES field. */
.Lpre_tf_lo_s:
.quad 0x9197E2E474720701, 0xC7C1B4B222245157
.Lpre_tf_hi_s:
.quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
/* post-SubByte affine transform, from AES field to SM4 field. */
.Lpost_tf_lo_s:
.quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
.Lpost_tf_hi_s:
.quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
/* For isolating SubBytes from AESENCLAST, inverse shift row */
.Linv_shift_row:
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_8:
.byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
.byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_16:
.byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
.byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_24:
.byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
.byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
/* For CTR-mode IV byteswap */
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/* For input word byte-swap */
.Lbswap32_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.align 4
/* 4-bit mask */
.L0f0f0f0f:
.long 0x0f0f0f0f
.text
.align 16
.align 8
SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
/* input:
* %rdi: round key array, CTX
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
* plaintext blocks
* output:
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
* ciphertext blocks
*/
FRAME_BEGIN
vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
vpshufb RTMP2, RA0, RA0;
vpshufb RTMP2, RA1, RA1;
vpshufb RTMP2, RA2, RA2;
vpshufb RTMP2, RA3, RA3;
vpshufb RTMP2, RB0, RB0;
vpshufb RTMP2, RB1, RB1;
vpshufb RTMP2, RB2, RB2;
vpshufb RTMP2, RB3, RB3;
vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
vpbroadcastd (4*(round))(%rdi), RX0; \
vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
vmovdqa RX0, RX1; \
vpxor s1, RX0, RX0; \
vpxor s2, RX0, RX0; \
vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
vpxor r1, RX1, RX1; \
vpxor r2, RX1, RX1; \
vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
\
/* sbox, non-linear part */ \
transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
vextracti128 $1, RX0, RTMP4x; \
vextracti128 $1, RX1, RTMP0x; \
vaesenclast MASK_4BITx, RX0x, RX0x; \
vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
vaesenclast MASK_4BITx, RX1x, RX1x; \
vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
vinserti128 $1, RTMP4x, RX0, RX0; \
vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
vinserti128 $1, RTMP0x, RX1, RX1; \
transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
\
/* linear part */ \
vpshufb RTMP4, RX0, RTMP0; \
vpxor RTMP0, s0, s0; /* s0 ^ x */ \
vpshufb RTMP4, RX1, RTMP2; \
vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
vpxor RTMP2, r0, r0; /* r0 ^ x */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
vpshufb RTMP4, RX1, RTMP3; \
vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
vpshufb RTMP4, RX1, RTMP3; \
vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
vpslld $2, RTMP0, RTMP1; \
vpsrld $30, RTMP0, RTMP0; \
vpxor RTMP0, s0, s0; \
/* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
vpxor RTMP1, s0, s0; \
vpshufb RTMP4, RX1, RTMP3; \
vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
vpslld $2, RTMP2, RTMP3; \
vpsrld $30, RTMP2, RTMP2; \
vpxor RTMP2, r0, r0; \
/* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
vpxor RTMP3, r0, r0;
leaq (32*4)(%rdi), %rax;
.align 16
.Lroundloop_blk8:
ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
leaq (4*4)(%rdi), %rdi;
cmpq %rax, %rdi;
jne .Lroundloop_blk8;
#undef ROUND
vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
vpshufb RTMP2, RA0, RA0;
vpshufb RTMP2, RA1, RA1;
vpshufb RTMP2, RA2, RA2;
vpshufb RTMP2, RA3, RA3;
vpshufb RTMP2, RB0, RB0;
vpshufb RTMP2, RB1, RB1;
vpshufb RTMP2, RB2, RB2;
vpshufb RTMP2, RB3, RB3;
FRAME_END
ret;
SYM_FUNC_END(__sm4_crypt_blk16)
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpsubq minus_one, x, x; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
/*
* void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (big endian, 128bit)
*/
FRAME_BEGIN
movq 8(%rcx), %rax;
bswapq %rax;
vzeroupper;
vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
vpcmpeqd RNOT, RNOT, RNOT;
vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
/* load IV and byteswap */
vmovdqu (%rcx), RTMP4x;
vpshufb RTMP3x, RTMP4x, RTMP4x;
vmovdqa RTMP4x, RTMP0x;
inc_le128(RTMP4x, RNOTx, RTMP1x);
vinserti128 $1, RTMP4x, RTMP0, RTMP0;
vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
/* check need for handling 64-bit overflow and carry */
cmpq $(0xffffffffffffffff - 16), %rax;
ja .Lhandle_ctr_carry;
/* construct IVs */
vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
vpshufb RTMP3, RTMP0, RA1;
vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
vpshufb RTMP3, RTMP0, RA2;
vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
vpshufb RTMP3, RTMP0, RA3;
vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
vpshufb RTMP3, RTMP0, RB0;
vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
vpshufb RTMP3, RTMP0, RB1;
vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
vpshufb RTMP3, RTMP0, RB2;
vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
vpshufb RTMP3, RTMP0, RB3;
vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
vpshufb RTMP3x, RTMP0x, RTMP0x;
jmp .Lctr_carry_done;
.Lhandle_ctr_carry:
/* construct IVs */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
inc_le128(RTMP0, RNOT, RTMP1);
vextracti128 $1, RTMP0, RTMP0x;
vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
.align 4
.Lctr_carry_done:
/* store new IV */
vmovdqu RTMP0x, (%rcx);
call __sm4_crypt_blk16;
vpxor (0 * 32)(%rdx), RA0, RA0;
vpxor (1 * 32)(%rdx), RA1, RA1;
vpxor (2 * 32)(%rdx), RA2, RA2;
vpxor (3 * 32)(%rdx), RA3, RA3;
vpxor (4 * 32)(%rdx), RB0, RB0;
vpxor (5 * 32)(%rdx), RB1, RB1;
vpxor (6 * 32)(%rdx), RB2, RB2;
vpxor (7 * 32)(%rdx), RB3, RB3;
vmovdqu RA0, (0 * 32)(%rsi);
vmovdqu RA1, (1 * 32)(%rsi);
vmovdqu RA2, (2 * 32)(%rsi);
vmovdqu RA3, (3 * 32)(%rsi);
vmovdqu RB0, (4 * 32)(%rsi);
vmovdqu RB1, (5 * 32)(%rsi);
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
/*
* void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv
*/
FRAME_BEGIN
vzeroupper;
vmovdqu (0 * 32)(%rdx), RA0;
vmovdqu (1 * 32)(%rdx), RA1;
vmovdqu (2 * 32)(%rdx), RA2;
vmovdqu (3 * 32)(%rdx), RA3;
vmovdqu (4 * 32)(%rdx), RB0;
vmovdqu (5 * 32)(%rdx), RB1;
vmovdqu (6 * 32)(%rdx), RB2;
vmovdqu (7 * 32)(%rdx), RB3;
call __sm4_crypt_blk16;
vmovdqu (%rcx), RNOTx;
vinserti128 $1, (%rdx), RNOT, RNOT;
vpxor RNOT, RA0, RA0;
vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
vmovdqu RNOTx, (%rcx); /* store new IV */
vmovdqu RA0, (0 * 32)(%rsi);
vmovdqu RA1, (1 * 32)(%rsi);
vmovdqu RA2, (2 * 32)(%rsi);
vmovdqu RA3, (3 * 32)(%rsi);
vmovdqu RB0, (4 * 32)(%rsi);
vmovdqu RB1, (5 * 32)(%rsi);
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
/*
* void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv
*/
FRAME_BEGIN
vzeroupper;
/* Load input */
vmovdqu (%rcx), RNOTx;
vinserti128 $1, (%rdx), RNOT, RA0;
vmovdqu (0 * 32 + 16)(%rdx), RA1;
vmovdqu (1 * 32 + 16)(%rdx), RA2;
vmovdqu (2 * 32 + 16)(%rdx), RA3;
vmovdqu (3 * 32 + 16)(%rdx), RB0;
vmovdqu (4 * 32 + 16)(%rdx), RB1;
vmovdqu (5 * 32 + 16)(%rdx), RB2;
vmovdqu (6 * 32 + 16)(%rdx), RB3;
/* Update IV */
vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
vmovdqu RNOTx, (%rcx);
call __sm4_crypt_blk16;
vpxor (0 * 32)(%rdx), RA0, RA0;
vpxor (1 * 32)(%rdx), RA1, RA1;
vpxor (2 * 32)(%rdx), RA2, RA2;
vpxor (3 * 32)(%rdx), RA3, RA3;
vpxor (4 * 32)(%rdx), RB0, RB0;
vpxor (5 * 32)(%rdx), RB1, RB1;
vpxor (6 * 32)(%rdx), RB2, RB2;
vpxor (7 * 32)(%rdx), RB3, RB3;
vmovdqu RA0, (0 * 32)(%rsi);
vmovdqu RA1, (1 * 32)(%rsi);
vmovdqu RA2, (2 * 32)(%rsi);
vmovdqu RA3, (3 * 32)(%rsi);
vmovdqu RB0, (4 * 32)(%rsi);
vmovdqu RB1, (5 * 32)(%rsi);
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)

24
arch/x86/crypto/sm4-avx.h Normal file
View File

@@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef ASM_X86_SM4_AVX_H
#define ASM_X86_SM4_AVX_H
#include <linux/types.h>
#include <crypto/sm4.h>
typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv);
int sm4_avx_ecb_encrypt(struct skcipher_request *req);
int sm4_avx_ecb_decrypt(struct skcipher_request *req);
int sm4_cbc_encrypt(struct skcipher_request *req);
int sm4_avx_cbc_decrypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func);
int sm4_cfb_encrypt(struct skcipher_request *req);
int sm4_avx_cfb_decrypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func);
int sm4_avx_ctr_crypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func);
#endif

View File

@@ -0,0 +1,169 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (c) 2021, Alibaba Group.
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-avx.h"
#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16)
asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
}
static int cbc_decrypt(struct skcipher_request *req)
{
return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
sm4_aesni_avx2_cbc_dec_blk16);
}
static int cfb_decrypt(struct skcipher_request *req)
{
return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
sm4_aesni_avx2_cfb_dec_blk16);
}
static int ctr_crypt(struct skcipher_request *req)
{
return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE,
sm4_aesni_avx2_ctr_enc_blk16);
}
static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
{
.base = {
.cra_name = "__ecb(sm4)",
.cra_driver_name = "__ecb-sm4-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.walksize = 16 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_avx_ecb_encrypt,
.decrypt = sm4_avx_ecb_decrypt,
}, {
.base = {
.cra_name = "__cbc(sm4)",
.cra_driver_name = "__cbc-sm4-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.walksize = 16 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base = {
.cra_name = "__cfb(sm4)",
.cra_driver_name = "__cfb-sm4-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.walksize = 16 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_cfb_encrypt,
.decrypt = cfb_decrypt,
}, {
.base = {
.cra_name = "__ctr(sm4)",
.cra_driver_name = "__ctr-sm4-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.walksize = 16 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}
};
static struct simd_skcipher_alg *
simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
static int __init sm4_init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
ARRAY_SIZE(sm4_aesni_avx2_skciphers),
simd_sm4_aesni_avx2_skciphers);
}
static void __exit sm4_exit(void)
{
simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
ARRAY_SIZE(sm4_aesni_avx2_skciphers),
simd_sm4_aesni_avx2_skciphers);
}
module_init(sm4_init);
module_exit(sm4_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("sm4-aesni-avx2");

View File

@@ -0,0 +1,487 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm, AES-NI/AVX optimized.
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (c) 2021, Alibaba Group.
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-avx.h"
#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8)
asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
const u8 *src, int nblocks);
asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
const u8 *src, int nblocks);
asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
}
static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
{
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_fpu_begin();
while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
sm4_aesni_avx_crypt8(rkey, dst, src, 8);
dst += SM4_CRYPT8_BLOCK_SIZE;
src += SM4_CRYPT8_BLOCK_SIZE;
nbytes -= SM4_CRYPT8_BLOCK_SIZE;
}
while (nbytes >= SM4_BLOCK_SIZE) {
unsigned int nblocks = min(nbytes >> 4, 4u);
sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
kernel_fpu_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
int sm4_avx_ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_do_crypt(req, ctx->rkey_enc);
}
EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
int sm4_avx_ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_do_crypt(req, ctx->rkey_dec);
}
EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
int sm4_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *iv = walk.iv;
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
while (nbytes >= SM4_BLOCK_SIZE) {
crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
sm4_crypt_block(ctx->rkey_enc, dst, dst);
iv = dst;
src += SM4_BLOCK_SIZE;
dst += SM4_BLOCK_SIZE;
nbytes -= SM4_BLOCK_SIZE;
}
if (iv != walk.iv)
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
int sm4_avx_cbc_decrypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_fpu_begin();
while (nbytes >= bsize) {
func(ctx->rkey_dec, dst, src, walk.iv);
dst += bsize;
src += bsize;
nbytes -= bsize;
}
while (nbytes >= SM4_BLOCK_SIZE) {
u8 keystream[SM4_BLOCK_SIZE * 8];
u8 iv[SM4_BLOCK_SIZE];
unsigned int nblocks = min(nbytes >> 4, 8u);
int i;
sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
src, nblocks);
src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
dst += (nblocks - 1) * SM4_BLOCK_SIZE;
memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
for (i = nblocks - 1; i > 0; i--) {
crypto_xor_cpy(dst, src,
&keystream[i * SM4_BLOCK_SIZE],
SM4_BLOCK_SIZE);
src -= SM4_BLOCK_SIZE;
dst -= SM4_BLOCK_SIZE;
}
crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
dst += nblocks * SM4_BLOCK_SIZE;
src += (nblocks + 1) * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
kernel_fpu_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
static int cbc_decrypt(struct skcipher_request *req)
{
return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
sm4_aesni_avx_cbc_dec_blk8);
}
int sm4_cfb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
u8 keystream[SM4_BLOCK_SIZE];
const u8 *iv = walk.iv;
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
while (nbytes >= SM4_BLOCK_SIZE) {
sm4_crypt_block(ctx->rkey_enc, keystream, iv);
crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
iv = dst;
src += SM4_BLOCK_SIZE;
dst += SM4_BLOCK_SIZE;
nbytes -= SM4_BLOCK_SIZE;
}
if (iv != walk.iv)
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
int sm4_avx_cfb_decrypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_fpu_begin();
while (nbytes >= bsize) {
func(ctx->rkey_enc, dst, src, walk.iv);
dst += bsize;
src += bsize;
nbytes -= bsize;
}
while (nbytes >= SM4_BLOCK_SIZE) {
u8 keystream[SM4_BLOCK_SIZE * 8];
unsigned int nblocks = min(nbytes >> 4, 8u);
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
if (nblocks > 1)
memcpy(&keystream[SM4_BLOCK_SIZE], src,
(nblocks - 1) * SM4_BLOCK_SIZE);
memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
SM4_BLOCK_SIZE);
sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
keystream, nblocks);
crypto_xor_cpy(dst, src, keystream,
nblocks * SM4_BLOCK_SIZE);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
kernel_fpu_end();
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
static int cfb_decrypt(struct skcipher_request *req)
{
return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
sm4_aesni_avx_cfb_dec_blk8);
}
int sm4_avx_ctr_crypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_fpu_begin();
while (nbytes >= bsize) {
func(ctx->rkey_enc, dst, src, walk.iv);
dst += bsize;
src += bsize;
nbytes -= bsize;
}
while (nbytes >= SM4_BLOCK_SIZE) {
u8 keystream[SM4_BLOCK_SIZE * 8];
unsigned int nblocks = min(nbytes >> 4, 8u);
int i;
for (i = 0; i < nblocks; i++) {
memcpy(&keystream[i * SM4_BLOCK_SIZE],
walk.iv, SM4_BLOCK_SIZE);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
}
sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
keystream, nblocks);
crypto_xor_cpy(dst, src, keystream,
nblocks * SM4_BLOCK_SIZE);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
kernel_fpu_end();
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
crypto_xor_cpy(dst, src, keystream, nbytes);
dst += nbytes;
src += nbytes;
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
static int ctr_crypt(struct skcipher_request *req)
{
return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
sm4_aesni_avx_ctr_enc_blk8);
}
static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
{
.base = {
.cra_name = "__ecb(sm4)",
.cra_driver_name = "__ecb-sm4-aesni-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.walksize = 8 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_avx_ecb_encrypt,
.decrypt = sm4_avx_ecb_decrypt,
}, {
.base = {
.cra_name = "__cbc(sm4)",
.cra_driver_name = "__cbc-sm4-aesni-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.walksize = 8 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base = {
.cra_name = "__cfb(sm4)",
.cra_driver_name = "__cfb-sm4-aesni-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.walksize = 8 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_cfb_encrypt,
.decrypt = cfb_decrypt,
}, {
.base = {
.cra_name = "__ctr(sm4)",
.cra_driver_name = "__ctr-sm4-aesni-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.walksize = 8 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}
};
static struct simd_skcipher_alg *
simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
static int __init sm4_init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX or AES-NI instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
ARRAY_SIZE(sm4_aesni_avx_skciphers),
simd_sm4_aesni_avx_skciphers);
}
static void __exit sm4_exit(void)
{
simd_unregister_skciphers(sm4_aesni_avx_skciphers,
ARRAY_SIZE(sm4_aesni_avx_skciphers),
simd_sm4_aesni_avx_skciphers);
}
module_init(sm4_init);
module_exit(sm4_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("sm4-aesni-avx");

View File

@@ -1547,6 +1547,7 @@ config CRYPTO_SERPENT_AVX2_X86_64
config CRYPTO_SM4
tristate "SM4 cipher algorithm"
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
@@ -1569,6 +1570,49 @@ config CRYPTO_SM4
If unsure, say N.
config CRYPTO_SM4_AESNI_AVX_X86_64
tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)"
depends on X86 && 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
Organization of State Commercial Administration of China (OSCCA)
as an authorized cryptographic algorithms for the use within China.
This is SM4 optimized implementation using AES-NI/AVX/x86_64
instruction set for block cipher. Through two affine transforms,
we can use the AES S-Box to simulate the SM4 S-Box to achieve the
effect of instruction acceleration.
If unsure, say N.
config CRYPTO_SM4_AESNI_AVX2_X86_64
tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)"
depends on X86 && 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
select CRYPTO_SM4_AESNI_AVX_X86_64
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
Organization of State Commercial Administration of China (OSCCA)
as an authorized cryptographic algorithms for the use within China.
This is SM4 optimized implementation using AES-NI/AVX2/x86_64
instruction set for block cipher. Through two affine transforms,
we can use the AES S-Box to simulate the SM4 S-Box to achieve the
effect of instruction acceleration.
If unsure, say N.
config CRYPTO_TEA
tristate "TEA, XTEA and XETA cipher algorithms"
depends on CRYPTO_USER_API_ENABLE_OBSOLETE

View File

@@ -74,7 +74,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o
obj-$(CONFIG_CRYPTO_MD4) += md4.o
obj-$(CONFIG_CRYPTO_MD5) += md5.o
obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o
obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o
obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o

View File

@@ -27,6 +27,7 @@
#define _CRYPTO_ECC_H
#include <crypto/ecc_curve.h>
#include <asm/unaligned.h>
/* One digit is u64 qword. */
#define ECC_CURVE_NIST_P192_DIGITS 3
@@ -46,13 +47,13 @@
* @out: Output array
* @ndigits: Number of digits to copy
*/
static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits)
static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigits)
{
const __be64 *src = (__force __be64 *)in;
int i;
for (i = 0; i < ndigits; i++)
out[i] = be64_to_cpu(src[ndigits - 1 - i]);
out[i] = get_unaligned_be64(&src[ndigits - 1 - i]);
}
/**

View File

@@ -143,9 +143,6 @@ sha512_transform(u64 *state, const u8 *input)
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
/* erase our data */
a = b = c = d = e = f = g = h = t1 = t2 = 0;
}
static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,

View File

@@ -431,7 +431,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
static int skcipher_walk_first(struct skcipher_walk *walk)
{
if (WARN_ON_ONCE(in_irq()))
if (WARN_ON_ONCE(in_hardirq()))
return -EDEADLK;
walk->buffer = NULL;

View File

@@ -16,191 +16,43 @@
#include <asm/byteorder.h>
#include <asm/unaligned.h>
static const u32 fk[4] = {
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
};
static const u8 sbox[256] = {
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
};
static const u32 ck[] = {
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
};
static u32 sm4_t_non_lin_sub(u32 x)
{
int i;
u8 *b = (u8 *)&x;
for (i = 0; i < 4; ++i)
b[i] = sbox[b[i]];
return x;
}
static u32 sm4_key_lin_sub(u32 x)
{
return x ^ rol32(x, 13) ^ rol32(x, 23);
}
static u32 sm4_enc_lin_sub(u32 x)
{
return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
}
static u32 sm4_key_sub(u32 x)
{
return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
}
static u32 sm4_enc_sub(u32 x)
{
return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
}
static u32 sm4_round(const u32 *x, const u32 rk)
{
return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
}
/**
* crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
* @ctx: The location where the computed key will be stored.
* @in_key: The supplied key.
* @key_len: The length of the supplied key.
*
* Returns 0 on success. The function fails only if an invalid key size (or
* pointer) is supplied.
*/
int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
u32 rk[4], t;
const u32 *key = (u32 *)in_key;
int i;
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
for (i = 0; i < 4; ++i)
rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
for (i = 0; i < 32; ++i) {
t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
ctx->rkey_enc[i] = t;
rk[0] = rk[1];
rk[1] = rk[2];
rk[2] = rk[3];
rk[3] = t;
}
for (i = 0; i < 32; ++i)
ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
return 0;
}
EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
/**
* crypto_sm4_set_key - Set the SM4 key.
* sm4_setkey - Set the SM4 key.
* @tfm: The %crypto_tfm that is used in the context.
* @in_key: The input key.
* @key_len: The size of the key.
*
* This function uses crypto_sm4_expand_key() to expand the key.
* &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
* This function uses sm4_expandkey() to expand the key.
* &sm4_ctx _must_ be the private data embedded in @tfm which is
* retrieved with crypto_tfm_ctx().
*
* Return: 0 on success; -EINVAL on failure (only happens for bad key lengths)
*/
int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
return crypto_sm4_expand_key(ctx, in_key, key_len);
}
EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
{
u32 x[4], i, t;
for (i = 0; i < 4; ++i)
x[i] = get_unaligned_be32(&in[i]);
for (i = 0; i < 32; ++i) {
t = sm4_round(x, rk[i]);
x[0] = x[1];
x[1] = x[2];
x[2] = x[3];
x[3] = t;
}
for (i = 0; i < 4; ++i)
put_unaligned_be32(x[3 - i], &out[i]);
return sm4_expandkey(ctx, in_key, key_len);
}
/* encrypt a block of text */
void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
sm4_crypt_block(ctx->rkey_enc, out, in);
}
EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
/* decrypt a block of text */
void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
sm4_crypt_block(ctx->rkey_dec, out, in);
}
EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
static struct crypto_alg sm4_alg = {
.cra_name = "sm4",
@@ -208,15 +60,15 @@ static struct crypto_alg sm4_alg = {
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = crypto_sm4_set_key,
.cia_encrypt = crypto_sm4_encrypt,
.cia_decrypt = crypto_sm4_decrypt
.cia_setkey = sm4_setkey,
.cia_encrypt = sm4_encrypt,
.cia_decrypt = sm4_decrypt
}
}
};

View File

@@ -77,7 +77,7 @@ static const char *check[] = {
NULL
};
static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 };
static const int block_sizes[] = { 16, 64, 128, 256, 1024, 1420, 4096, 0 };
static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 };
#define XBUFSIZE 8
@@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
}
ret = crypto_aead_setauthsize(tfm, authsize);
if (ret) {
pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
ret);
goto out_free_tfm;
}
for (i = 0; i < num_mb; ++i)
if (testmgr_alloc_buf(data[i].xbuf)) {
@@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
for (i = 0; i < num_mb; ++i) {
data[i].req = aead_request_alloc(tfm, GFP_KERNEL);
if (!data[i].req) {
pr_err("alg: skcipher: Failed to allocate request for %s\n",
pr_err("alg: aead: Failed to allocate request for %s\n",
algo);
while (i--)
aead_request_free(data[i].req);
@@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
sgout = &sg[9];
tfm = crypto_alloc_aead(algo, 0, 0);
if (IS_ERR(tfm)) {
pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
PTR_ERR(tfm));
goto out_notfm;
}
ret = crypto_aead_setauthsize(tfm, authsize);
if (ret) {
pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
ret);
goto out_noreq;
}
crypto_init_wait(&wait);
printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
get_driver_name(crypto_aead, tfm), e);
@@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
break;
}
}
ret = crypto_aead_setkey(tfm, key, *keysize);
ret = crypto_aead_setauthsize(tfm, authsize);
if (ret) {
pr_err("setkey() failed flags=%x: %d\n",
crypto_aead_get_flags(tfm), ret);
goto out;
}
iv_len = crypto_aead_ivsize(tfm);
if (iv_len)
@@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
i, *keysize * 8, bs);
memset(tvmem[0], 0xff, PAGE_SIZE);
if (ret) {
pr_err("setkey() failed flags=%x\n",
crypto_aead_get_flags(tfm));
goto out;
}
sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize),
assoc, aad_size);
@@ -1907,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("streebog512");
break;
case 55:
ret += tcrypt_test("gcm(sm4)");
break;
case 56:
ret += tcrypt_test("ccm(sm4)");
break;
case 100:
ret += tcrypt_test("hmac(md5)");
break;
@@ -1998,6 +2015,15 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 157:
ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
break;
case 158:
ret += tcrypt_test("cbcmac(sm4)");
break;
case 159:
ret += tcrypt_test("cmac(sm4)");
break;
case 181:
ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
break;
@@ -2031,6 +2057,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 191:
ret += tcrypt_test("ecb(sm4)");
ret += tcrypt_test("cbc(sm4)");
ret += tcrypt_test("cfb(sm4)");
ret += tcrypt_test("ctr(sm4)");
break;
case 200:
@@ -2289,6 +2316,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
@@ -2322,6 +2353,34 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
NULL, 0, 16, 8, speed_template_16);
break;
case 222:
test_aead_speed("gcm(sm4)", ENCRYPT, sec,
NULL, 0, 16, 8, speed_template_16);
test_aead_speed("gcm(sm4)", DECRYPT, sec,
NULL, 0, 16, 8, speed_template_16);
break;
case 223:
test_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec,
NULL, 0, 16, 16, aead_speed_template_19);
test_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec,
NULL, 0, 16, 16, aead_speed_template_19);
break;
case 224:
test_mb_aead_speed("gcm(sm4)", ENCRYPT, sec, NULL, 0, 16, 8,
speed_template_16, num_mb);
test_mb_aead_speed("gcm(sm4)", DECRYPT, sec, NULL, 0, 16, 8,
speed_template_16, num_mb);
break;
case 225:
test_mb_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, NULL, 0,
16, 16, aead_speed_template_19, num_mb);
test_mb_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, NULL, 0,
16, 16, aead_speed_template_19, num_mb);
break;
case 300:
if (alg) {
test_hash_speed(alg, sec, generic_hash_speed_template);
@@ -2757,6 +2816,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_8_32);
break;
case 518:
test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
break;
case 600:
test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32, num_mb);

View File

@@ -4450,6 +4450,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(aes_cbcmac_tv_template)
}
}, {
.alg = "cbcmac(sm4)",
.test = alg_test_hash,
.suite = {
.hash = __VECS(sm4_cbcmac_tv_template)
}
}, {
.alg = "ccm(aes)",
.generic_driver = "ccm_base(ctr(aes-generic),cbcmac(aes-generic))",
@@ -4461,6 +4467,16 @@ static const struct alg_test_desc alg_test_descs[] = {
.einval_allowed = 1,
}
}
}, {
.alg = "ccm(sm4)",
.generic_driver = "ccm_base(ctr(sm4-generic),cbcmac(sm4-generic))",
.test = alg_test_aead,
.suite = {
.aead = {
____VECS(sm4_ccm_tv_template),
.einval_allowed = 1,
}
}
}, {
.alg = "cfb(aes)",
.test = alg_test_skcipher,
@@ -4494,6 +4510,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(des3_ede_cmac64_tv_template)
}
}, {
.alg = "cmac(sm4)",
.test = alg_test_hash,
.suite = {
.hash = __VECS(sm4_cmac128_tv_template)
}
}, {
.alg = "compress_null",
.test = alg_test_null,
@@ -4967,6 +4989,13 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.aead = __VECS(aes_gcm_tv_template)
}
}, {
.alg = "gcm(sm4)",
.generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)",
.test = alg_test_aead,
.suite = {
.aead = __VECS(sm4_gcm_tv_template)
}
}, {
.alg = "ghash",
.test = alg_test_hash,

View File

@@ -13328,6 +13328,154 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = {
}
};
static const struct aead_testvec sm4_gcm_tv_template[] = {
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
.klen = 16,
.iv = "\x00\x00\x12\x34\x56\x78\x00\x00"
"\x00\x00\xAB\xCD",
.ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
"\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
"\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
.plen = 64,
.assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
"\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
"\xAB\xAD\xDA\xD2",
.alen = 20,
.ctext = "\x17\xF3\x99\xF0\x8C\x67\xD5\xEE"
"\x19\xD0\xDC\x99\x69\xC4\xBB\x7D"
"\x5F\xD4\x6F\xD3\x75\x64\x89\x06"
"\x91\x57\xB2\x82\xBB\x20\x07\x35"
"\xD8\x27\x10\xCA\x5C\x22\xF0\xCC"
"\xFA\x7C\xBF\x93\xD4\x96\xAC\x15"
"\xA5\x68\x34\xCB\xCF\x98\xC3\x97"
"\xB4\x02\x4A\x26\x91\x23\x3B\x8D"
"\x83\xDE\x35\x41\xE4\xC2\xB5\x81"
"\x77\xE0\x65\xA9\xBF\x7B\x62\xEC",
.clen = 80,
}
};
static const struct aead_testvec sm4_ccm_tv_template[] = {
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.2 */
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
.klen = 16,
.iv = "\x02\x00\x00\x12\x34\x56\x78\x00"
"\x00\x00\x00\xAB\xCD\x00\x00\x00",
.ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
"\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
"\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
.plen = 64,
.assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
"\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
"\xAB\xAD\xDA\xD2",
.alen = 20,
.ctext = "\x48\xAF\x93\x50\x1F\xA6\x2A\xDB"
"\xCD\x41\x4C\xCE\x60\x34\xD8\x95"
"\xDD\xA1\xBF\x8F\x13\x2F\x04\x20"
"\x98\x66\x15\x72\xE7\x48\x30\x94"
"\xFD\x12\xE5\x18\xCE\x06\x2C\x98"
"\xAC\xEE\x28\xD9\x5D\xF4\x41\x6B"
"\xED\x31\xA2\xF0\x44\x76\xC1\x8B"
"\xB4\x0C\x84\xA7\x4B\x97\xDC\x5B"
"\x16\x84\x2D\x4F\xA1\x86\xF5\x6A"
"\xB3\x32\x56\x97\x1F\xA1\x10\xF4",
.clen = 80,
}
};
static const struct hash_testvec sm4_cbcmac_tv_template[] = {
{
.key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
"\x77\x66\x55\x44\x33\x22\x11\x00",
.plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xba\x98\x76\x54\x32\x10",
.digest = "\x97\xb4\x75\x8f\x84\x92\x3d\x3f"
"\x86\x81\x0e\x0e\xea\x14\x6d\x73",
.psize = 16,
.ksize = 16,
}, {
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
.plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
"\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
"\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
"\xee",
.digest = "\xc7\xdb\x17\x71\xa1\x5c\x0d\x22"
"\xa3\x39\x3a\x31\x88\x91\x49\xa1",
.psize = 33,
.ksize = 16,
}, {
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
.plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
"\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
"\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
"\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
"\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
"\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
"\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
"\xfd\xdb\xb1\x9b\x76\x5c\x37",
.digest = "\x9b\x07\x88\x7f\xd5\x95\x23\x12"
"\x64\x0a\x66\x7f\x4e\x25\xca\xd0",
.psize = 63,
.ksize = 16,
}
};
static const struct hash_testvec sm4_cmac128_tv_template[] = {
{
.key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
"\x77\x66\x55\x44\x33\x22\x11\x00",
.plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xba\x98\x76\x54\x32\x10",
.digest = "\x00\xd4\x63\xb4\x9a\xf3\x52\xe2"
"\x74\xa9\x00\x55\x13\x54\x2a\xd1",
.psize = 16,
.ksize = 16,
}, {
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
.plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
"\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
"\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
"\xee",
.digest = "\x8a\x8a\xe9\xc0\xc8\x97\x0e\x85"
"\x21\x57\x02\x10\x1a\xbf\x9c\xc6",
.psize = 33,
.ksize = 16,
}, {
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
.plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
"\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
"\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
"\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
"\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
"\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
"\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
"\xfd\xdb\xb1\x9b\x76\x5c\x37",
.digest = "\x5f\x14\xc9\xa9\x20\xb2\xb4\xf0"
"\x76\xe0\xd8\xd6\xdc\x4f\xe1\xbc",
.psize = 63,
.ksize = 16,
}
};
/* Cast6 test vectors from RFC 2612 */
static const struct cipher_testvec cast6_tv_template[] = {
{

View File

@@ -775,7 +775,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = {
0xca2dbf07ad5a8333ULL,
};
/**
/*
* The core Whirlpool transform.
*/

Some files were not shown because too many files have changed in this diff Show More