mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
crypto: arm64/sm4 - add CE implementation for GCM mode
This patch is a CE-optimized assembly implementation for GCM mode. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 224 and 224 modes of tcrypt, and compared the performance before and after this patch (the driver used before this patch is gcm_base(ctr-sm4-ce,ghash-generic)). The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: Before (gcm_base(ctr-sm4-ce,ghash-generic)): gcm(sm4) | 16 64 256 512 1024 1420 4096 8192 -------------+--------------------------------------------------------------------- GCM enc | 25.24 64.65 104.66 116.69 123.81 125.12 129.67 130.62 GCM dec | 25.40 64.80 104.74 116.70 123.81 125.21 129.68 130.59 GCM mb enc | 24.95 64.06 104.20 116.38 123.55 124.97 129.63 130.61 GCM mb dec | 24.92 64.00 104.13 116.34 123.55 124.98 129.56 130.48 After: gcm-sm4-ce | 16 64 256 512 1024 1420 4096 8192 -------------+--------------------------------------------------------------------- GCM enc | 108.62 397.18 971.60 1283.92 1522.77 1513.39 1777.00 1806.96 GCM dec | 116.36 398.14 1004.27 1319.11 1624.21 1635.43 1932.54 1974.20 GCM mb enc | 107.13 391.79 962.05 1274.94 1514.76 1508.57 1769.07 1801.58 GCM mb dec | 113.40 389.36 988.51 1307.68 1619.10 1631.55 1931.70 1970.86 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
committed by
Herbert Xu
parent
67fa3a7fdf
commit
ae1b83c7d5
@@ -297,6 +297,22 @@ config CRYPTO_SM4_ARM64_CE_CCM
|
||||
- ARMv8 Crypto Extensions
|
||||
- NEON (Advanced SIMD) extensions
|
||||
|
||||
config CRYPTO_SM4_ARM64_CE_GCM
|
||||
tristate "AEAD cipher: SM4 in GCM mode (ARMv8 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_SM4
|
||||
select CRYPTO_SM4_ARM64_CE_BLK
|
||||
help
|
||||
AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with
|
||||
GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
|
||||
|
||||
Architecture: arm64 using:
|
||||
- ARMv8 Crypto Extensions
|
||||
- PMULL (Polynomial Multiply Long) instructions
|
||||
- NEON (Advanced SIMD) extensions
|
||||
|
||||
config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
tristate "CRCT10DIF (PMULL)"
|
||||
depends on KERNEL_MODE_NEON && CRC_T10DIF
|
||||
|
||||
@@ -32,6 +32,9 @@ sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
|
||||
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o
|
||||
sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_GCM) += sm4-ce-gcm.o
|
||||
sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
|
||||
sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
|
||||
|
||||
|
||||
741
arch/arm64/crypto/sm4-ce-gcm-core.S
Normal file
741
arch/arm64/crypto/sm4-ce-gcm-core.S
Normal file
File diff suppressed because it is too large
Load Diff
286
arch/arm64/crypto/sm4-ce-gcm-glue.c
Normal file
286
arch/arm64/crypto/sm4-ce-gcm-glue.c
Normal file
@@ -0,0 +1,286 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
|
||||
* as specified in rfc8998
|
||||
* https://datatracker.ietf.org/doc/html/rfc8998
|
||||
*
|
||||
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/b128ops.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/sm4.h>
|
||||
#include "sm4-ce.h"
|
||||
|
||||
asmlinkage void sm4_ce_pmull_ghash_setup(const u32 *rkey_enc, u8 *ghash_table);
|
||||
asmlinkage void pmull_ghash_update(const u8 *ghash_table, u8 *ghash,
|
||||
const u8 *src, unsigned int nblocks);
|
||||
asmlinkage void sm4_ce_pmull_gcm_enc(const u32 *rkey_enc, u8 *dst,
|
||||
const u8 *src, u8 *iv,
|
||||
unsigned int nbytes, u8 *ghash,
|
||||
const u8 *ghash_table, const u8 *lengths);
|
||||
asmlinkage void sm4_ce_pmull_gcm_dec(const u32 *rkey_enc, u8 *dst,
|
||||
const u8 *src, u8 *iv,
|
||||
unsigned int nbytes, u8 *ghash,
|
||||
const u8 *ghash_table, const u8 *lengths);
|
||||
|
||||
#define GHASH_BLOCK_SIZE 16
|
||||
#define GCM_IV_SIZE 12
|
||||
|
||||
struct sm4_gcm_ctx {
|
||||
struct sm4_ctx key;
|
||||
u8 ghash_table[16 * 4];
|
||||
};
|
||||
|
||||
|
||||
static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
|
||||
if (key_len != SM4_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
kernel_neon_begin();
|
||||
|
||||
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
|
||||
crypto_sm4_fk, crypto_sm4_ck);
|
||||
sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
|
||||
|
||||
kernel_neon_end();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
|
||||
{
|
||||
switch (authsize) {
|
||||
case 4:
|
||||
case 8:
|
||||
case 12 ... 16:
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[])
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
|
||||
u8 __aligned(8) buffer[GHASH_BLOCK_SIZE];
|
||||
u32 assoclen = req->assoclen;
|
||||
struct scatter_walk walk;
|
||||
unsigned int buflen = 0;
|
||||
|
||||
scatterwalk_start(&walk, req->src);
|
||||
|
||||
do {
|
||||
u32 n = scatterwalk_clamp(&walk, assoclen);
|
||||
u8 *p, *ptr;
|
||||
|
||||
if (!n) {
|
||||
scatterwalk_start(&walk, sg_next(walk.sg));
|
||||
n = scatterwalk_clamp(&walk, assoclen);
|
||||
}
|
||||
|
||||
p = ptr = scatterwalk_map(&walk);
|
||||
assoclen -= n;
|
||||
scatterwalk_advance(&walk, n);
|
||||
|
||||
if (n + buflen < GHASH_BLOCK_SIZE) {
|
||||
memcpy(&buffer[buflen], ptr, n);
|
||||
buflen += n;
|
||||
} else {
|
||||
unsigned int nblocks;
|
||||
|
||||
if (buflen) {
|
||||
unsigned int l = GHASH_BLOCK_SIZE - buflen;
|
||||
|
||||
memcpy(&buffer[buflen], ptr, l);
|
||||
ptr += l;
|
||||
n -= l;
|
||||
|
||||
pmull_ghash_update(ctx->ghash_table, ghash,
|
||||
buffer, 1);
|
||||
}
|
||||
|
||||
nblocks = n / GHASH_BLOCK_SIZE;
|
||||
if (nblocks) {
|
||||
pmull_ghash_update(ctx->ghash_table, ghash,
|
||||
ptr, nblocks);
|
||||
ptr += nblocks * GHASH_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
buflen = n % GHASH_BLOCK_SIZE;
|
||||
if (buflen)
|
||||
memcpy(&buffer[0], ptr, buflen);
|
||||
}
|
||||
|
||||
scatterwalk_unmap(p);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
} while (assoclen);
|
||||
|
||||
/* padding with '0' */
|
||||
if (buflen) {
|
||||
memset(&buffer[buflen], 0, GHASH_BLOCK_SIZE - buflen);
|
||||
pmull_ghash_update(ctx->ghash_table, ghash, buffer, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
|
||||
struct sm4_gcm_ctx *ctx, u8 ghash[],
|
||||
void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc,
|
||||
u8 *dst, const u8 *src, u8 *iv,
|
||||
unsigned int nbytes, u8 *ghash,
|
||||
const u8 *ghash_table, const u8 *lengths))
|
||||
{
|
||||
u8 __aligned(8) iv[SM4_BLOCK_SIZE];
|
||||
be128 __aligned(8) lengths;
|
||||
int err;
|
||||
|
||||
memset(ghash, 0, SM4_BLOCK_SIZE);
|
||||
|
||||
lengths.a = cpu_to_be64(req->assoclen * 8);
|
||||
lengths.b = cpu_to_be64(walk->total * 8);
|
||||
|
||||
memcpy(iv, walk->iv, GCM_IV_SIZE);
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
|
||||
if (req->assoclen)
|
||||
gcm_calculate_auth_mac(req, ghash);
|
||||
|
||||
do {
|
||||
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
|
||||
const u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
|
||||
if (walk->nbytes == walk->total) {
|
||||
tail = 0;
|
||||
|
||||
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
|
||||
walk->nbytes, ghash,
|
||||
ctx->ghash_table,
|
||||
(const u8 *)&lengths);
|
||||
} else if (walk->nbytes - tail) {
|
||||
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
|
||||
walk->nbytes - tail, ghash,
|
||||
ctx->ghash_table, NULL);
|
||||
}
|
||||
|
||||
kernel_neon_end();
|
||||
|
||||
err = skcipher_walk_done(walk, tail);
|
||||
if (err)
|
||||
return err;
|
||||
if (walk->nbytes)
|
||||
kernel_neon_begin();
|
||||
} while (walk->nbytes > 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gcm_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
|
||||
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* copy authtag to end of dst */
|
||||
scatterwalk_map_and_copy(ghash, req->dst, req->assoclen + req->cryptlen,
|
||||
crypto_aead_authsize(aead), 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gcm_decrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
unsigned int authsize = crypto_aead_authsize(aead);
|
||||
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
|
||||
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
|
||||
u8 authtag[SM4_BLOCK_SIZE];
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* compare calculated auth tag with the stored one */
|
||||
scatterwalk_map_and_copy(authtag, req->src,
|
||||
req->assoclen + req->cryptlen - authsize,
|
||||
authsize, 0);
|
||||
|
||||
if (crypto_memneq(authtag, ghash, authsize))
|
||||
return -EBADMSG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct aead_alg sm4_gcm_alg = {
|
||||
.base = {
|
||||
.cra_name = "gcm(sm4)",
|
||||
.cra_driver_name = "gcm-sm4-ce",
|
||||
.cra_priority = 400,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_gcm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.ivsize = GCM_IV_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.maxauthsize = SM4_BLOCK_SIZE,
|
||||
.setkey = gcm_setkey,
|
||||
.setauthsize = gcm_setauthsize,
|
||||
.encrypt = gcm_encrypt,
|
||||
.decrypt = gcm_decrypt,
|
||||
};
|
||||
|
||||
static int __init sm4_ce_gcm_init(void)
|
||||
{
|
||||
if (!cpu_have_named_feature(PMULL))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_aead(&sm4_gcm_alg);
|
||||
}
|
||||
|
||||
static void __exit sm4_ce_gcm_exit(void)
|
||||
{
|
||||
crypto_unregister_aead(&sm4_gcm_alg);
|
||||
}
|
||||
|
||||
static const struct cpu_feature sm4_ce_gcm_cpu_feature[] = {
|
||||
{ cpu_feature(PMULL) },
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(cpu, sm4_ce_gcm_cpu_feature);
|
||||
|
||||
module_cpu_feature_match(SM4, sm4_ce_gcm_init);
|
||||
module_exit(sm4_ce_gcm_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Synchronous SM4 in GCM mode using ARMv8 Crypto Extensions");
|
||||
MODULE_ALIAS_CRYPTO("gcm(sm4)");
|
||||
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
Reference in New Issue
Block a user