You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
crypto: arm/sha256 - Add optimized SHA-256/224
Add Andy Polyakov's optimized assembly and NEON implementations for SHA-256/224. The sha256-armv4.pl script for generating the assembly code is from OpenSSL commit 51f8d095562f36cdaa6893597b5c609e943b0565. Compared to sha256-generic these implementations have the following tcrypt speed improvements on Motorola Nexus 6 (Snapdragon 805): bs b/u sha256-neon sha256-asm 16 16 x1.32 x1.19 64 16 x1.27 x1.15 64 64 x1.36 x1.20 256 16 x1.22 x1.11 256 64 x1.36 x1.19 256 256 x1.59 x1.23 1024 16 x1.21 x1.10 1024 256 x1.65 x1.23 1024 1024 x1.76 x1.25 2048 16 x1.21 x1.10 2048 256 x1.66 x1.23 2048 1024 x1.78 x1.25 2048 2048 x1.79 x1.25 4096 16 x1.20 x1.09 4096 256 x1.66 x1.23 4096 1024 x1.79 x1.26 4096 4096 x1.82 x1.26 8192 16 x1.20 x1.09 8192 256 x1.67 x1.23 8192 1024 x1.80 x1.26 8192 4096 x1.85 x1.28 8192 8192 x1.85 x1.27 Where bs refers to block size and b/u to bytes per update. Signed-off-by: Sami Tolvanen <samitolvanen@google.com> Cc: Andy Polyakov <appro@openssl.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
committed by
Herbert Xu
parent
87b1675634
commit
f2f770d74a
@@ -46,6 +46,13 @@ config CRYPTO_SHA2_ARM_CE
|
||||
SHA-256 secure hash standard (DFIPS 180-2) implemented
|
||||
using special ARMv8 Crypto Extensions.
|
||||
|
||||
config CRYPTO_SHA256_ARM
|
||||
tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
SHA-256 secure hash standard (DFIPS 180-2) implemented
|
||||
using optimized ARM assembler and NEON, when available.
|
||||
|
||||
config CRYPTO_SHA512_ARM_NEON
|
||||
tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
|
||||
@@ -7,6 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
|
||||
obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
|
||||
@@ -16,6 +17,8 @@ aes-arm-y := aes-armv4.o aes_glue.o
|
||||
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
|
||||
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
|
||||
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
|
||||
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
|
||||
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
|
||||
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
|
||||
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
|
||||
sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
|
||||
@@ -28,4 +31,7 @@ quiet_cmd_perl = PERL $@
|
||||
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
.PRECIOUS: $(obj)/aesbs-core.S
|
||||
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
|
||||
|
||||
@@ -163,7 +163,7 @@ static struct shash_alg algs[] = { {
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
@@ -180,7 +180,7 @@ static struct shash_alg algs[] = { {
|
||||
.base = {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Glue code for the SHA256 Secure Hash Algorithm assembly implementation
|
||||
* using optimized ARM assembler and NEON instructions.
|
||||
*
|
||||
* Copyright © 2015 Google Inc.
|
||||
*
|
||||
* This file is based on sha256_ssse3_glue.c:
|
||||
* Copyright (C) 2013 Intel Corporation
|
||||
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/string.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/neon.h>
|
||||
#include "sha256_glue.h"
|
||||
|
||||
asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
|
||||
unsigned int num_blks);
|
||||
|
||||
|
||||
int sha256_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
sctx->state[0] = SHA256_H0;
|
||||
sctx->state[1] = SHA256_H1;
|
||||
sctx->state[2] = SHA256_H2;
|
||||
sctx->state[3] = SHA256_H3;
|
||||
sctx->state[4] = SHA256_H4;
|
||||
sctx->state[5] = SHA256_H5;
|
||||
sctx->state[6] = SHA256_H6;
|
||||
sctx->state[7] = SHA256_H7;
|
||||
sctx->count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sha224_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
sctx->state[0] = SHA224_H0;
|
||||
sctx->state[1] = SHA224_H1;
|
||||
sctx->state[2] = SHA224_H2;
|
||||
sctx->state[3] = SHA224_H3;
|
||||
sctx->state[4] = SHA224_H4;
|
||||
sctx->state[5] = SHA224_H5;
|
||||
sctx->state[6] = SHA224_H6;
|
||||
sctx->state[7] = SHA224_H7;
|
||||
sctx->count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len,
|
||||
unsigned int partial)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int done = 0;
|
||||
|
||||
sctx->count += len;
|
||||
|
||||
if (partial) {
|
||||
done = SHA256_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->buf + partial, data, done);
|
||||
sha256_block_data_order(sctx->state, sctx->buf, 1);
|
||||
}
|
||||
|
||||
if (len - done >= SHA256_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
|
||||
|
||||
sha256_block_data_order(sctx->state, data + done, rounds);
|
||||
done += rounds * SHA256_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->buf, data + done, len - done);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA256_BLOCK_SIZE) {
|
||||
sctx->count += len;
|
||||
memcpy(sctx->buf + partial, data, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return __sha256_update(desc, data, len, partial);
|
||||
}
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int sha256_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be32 *dst = (__be32 *)out;
|
||||
__be64 bits;
|
||||
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
/* save number of bits */
|
||||
bits = cpu_to_be64(sctx->count << 3);
|
||||
|
||||
/* Pad out to 56 mod 64 and append length */
|
||||
index = sctx->count % SHA256_BLOCK_SIZE;
|
||||
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
|
||||
|
||||
/* We need to fill a whole block for __sha256_update */
|
||||
if (padlen <= 56) {
|
||||
sctx->count += padlen;
|
||||
memcpy(sctx->buf + index, padding, padlen);
|
||||
} else {
|
||||
__sha256_update(desc, padding, padlen, index);
|
||||
}
|
||||
__sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56);
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 8; i++)
|
||||
dst[i] = cpu_to_be32(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha224_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u8 D[SHA256_DIGEST_SIZE];
|
||||
|
||||
sha256_final(desc, D);
|
||||
|
||||
memcpy(out, D, SHA224_DIGEST_SIZE);
|
||||
memzero_explicit(D, SHA256_DIGEST_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sha256_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sha256_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.init = sha256_init,
|
||||
.update = sha256_update,
|
||||
.final = sha256_final,
|
||||
.export = sha256_export,
|
||||
.import = sha256_import,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-asm",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}, {
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.init = sha224_init,
|
||||
.update = sha256_update,
|
||||
.final = sha224_final,
|
||||
.export = sha256_export,
|
||||
.import = sha256_import,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-asm",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
} };
|
||||
|
||||
static int __init sha256_mod_init(void)
|
||||
{
|
||||
int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
|
||||
|
||||
if (res < 0)
|
||||
return res;
|
||||
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
|
||||
res = crypto_register_shashes(sha256_neon_algs,
|
||||
ARRAY_SIZE(sha256_neon_algs));
|
||||
|
||||
if (res < 0)
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void __exit sha256_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
|
||||
crypto_unregister_shashes(sha256_neon_algs,
|
||||
ARRAY_SIZE(sha256_neon_algs));
|
||||
}
|
||||
|
||||
module_init(sha256_mod_init);
|
||||
module_exit(sha256_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
|
||||
|
||||
MODULE_ALIAS_CRYPTO("sha256");
|
||||
@@ -0,0 +1,23 @@
|
||||
#ifndef _CRYPTO_SHA256_GLUE_H
|
||||
#define _CRYPTO_SHA256_GLUE_H
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/sha.h>
|
||||
|
||||
extern struct shash_alg sha256_neon_algs[2];
|
||||
|
||||
extern int sha256_init(struct shash_desc *desc);
|
||||
|
||||
extern int sha224_init(struct shash_desc *desc);
|
||||
|
||||
extern int __sha256_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, unsigned int partial);
|
||||
|
||||
extern int sha256_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len);
|
||||
|
||||
extern int sha256_export(struct shash_desc *desc, void *out);
|
||||
|
||||
extern int sha256_import(struct shash_desc *desc, const void *in);
|
||||
|
||||
#endif /* _CRYPTO_SHA256_GLUE_H */
|
||||
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Glue code for the SHA256 Secure Hash Algorithm assembly implementation
|
||||
* using NEON instructions.
|
||||
*
|
||||
* Copyright © 2015 Google Inc.
|
||||
*
|
||||
* This file is based on sha512_neon_glue.c:
|
||||
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/string.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/neon.h>
|
||||
#include "sha256_glue.h"
|
||||
|
||||
asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
|
||||
unsigned int num_blks);
|
||||
|
||||
|
||||
static int __sha256_neon_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int done = 0;
|
||||
|
||||
sctx->count += len;
|
||||
|
||||
if (partial) {
|
||||
done = SHA256_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->buf + partial, data, done);
|
||||
sha256_block_data_order_neon(sctx->state, sctx->buf, 1);
|
||||
}
|
||||
|
||||
if (len - done >= SHA256_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
|
||||
|
||||
sha256_block_data_order_neon(sctx->state, data + done, rounds);
|
||||
done += rounds * SHA256_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->buf, data + done, len - done);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_neon_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
|
||||
int res;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA256_BLOCK_SIZE) {
|
||||
sctx->count += len;
|
||||
memcpy(sctx->buf + partial, data, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!may_use_simd()) {
|
||||
res = __sha256_update(desc, data, len, partial);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
res = __sha256_neon_update(desc, data, len, partial);
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int sha256_neon_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be32 *dst = (__be32 *)out;
|
||||
__be64 bits;
|
||||
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
/* save number of bits */
|
||||
bits = cpu_to_be64(sctx->count << 3);
|
||||
|
||||
/* Pad out to 56 mod 64 and append length */
|
||||
index = sctx->count % SHA256_BLOCK_SIZE;
|
||||
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
sha256_update(desc, padding, padlen);
|
||||
sha256_update(desc, (const u8 *)&bits, sizeof(bits));
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
/* We need to fill a whole block for __sha256_neon_update() */
|
||||
if (padlen <= 56) {
|
||||
sctx->count += padlen;
|
||||
memcpy(sctx->buf + index, padding, padlen);
|
||||
} else {
|
||||
__sha256_neon_update(desc, padding, padlen, index);
|
||||
}
|
||||
__sha256_neon_update(desc, (const u8 *)&bits,
|
||||
sizeof(bits), 56);
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 8; i++)
|
||||
dst[i] = cpu_to_be32(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memzero_explicit(sctx, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha224_neon_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u8 D[SHA256_DIGEST_SIZE];
|
||||
|
||||
sha256_neon_final(desc, D);
|
||||
|
||||
memcpy(out, D, SHA224_DIGEST_SIZE);
|
||||
memzero_explicit(D, SHA256_DIGEST_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct shash_alg sha256_neon_algs[] = { {
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.init = sha256_init,
|
||||
.update = sha256_neon_update,
|
||||
.final = sha256_neon_final,
|
||||
.export = sha256_export,
|
||||
.import = sha256_import,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-neon",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}, {
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.init = sha224_init,
|
||||
.update = sha256_neon_update,
|
||||
.final = sha224_neon_final,
|
||||
.export = sha256_export,
|
||||
.import = sha256_import,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-neon",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
} };
|
||||
Reference in New Issue
Block a user