crypto: arm/sha256 - Add optimized SHA-256/224

Add Andy Polyakov's optimized assembly and NEON implementations for
SHA-256/224.

The sha256-armv4.pl script for generating the assembly code is from
OpenSSL commit 51f8d095562f36cdaa6893597b5c609e943b0565.

Compared to sha256-generic these implementations have the following
tcrypt speed improvements on Motorola Nexus 6 (Snapdragon 805):

  bs    b/u      sha256-neon  sha256-asm
  16    16       x1.32        x1.19
  64    16       x1.27        x1.15
  64    64       x1.36        x1.20
  256   16       x1.22        x1.11
  256   64       x1.36        x1.19
  256   256      x1.59        x1.23
  1024  16       x1.21        x1.10
  1024  256      x1.65        x1.23
  1024  1024     x1.76        x1.25
  2048  16       x1.21        x1.10
  2048  256      x1.66        x1.23
  2048  1024     x1.78        x1.25
  2048  2048     x1.79        x1.25
  4096  16       x1.20        x1.09
  4096  256      x1.66        x1.23
  4096  1024     x1.79        x1.26
  4096  4096     x1.82        x1.26
  8192  16       x1.20        x1.09
  8192  256      x1.67        x1.23
  8192  1024     x1.80        x1.26
  8192  4096     x1.85        x1.28
  8192  8192     x1.85        x1.27

Where bs refers to block size and b/u to bytes per update.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Cc: Andy Polyakov <appro@openssl.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Sami Tolvanen
2015-04-03 18:03:40 +08:00
committed by Herbert Xu
parent 87b1675634
commit f2f770d74a
8 changed files with 3981 additions and 3 deletions
+7
View File
@@ -46,6 +46,13 @@ config CRYPTO_SHA2_ARM_CE
SHA-256 secure hash standard (DFIPS 180-2) implemented
using special ARMv8 Crypto Extensions.
config CRYPTO_SHA256_ARM
tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
select CRYPTO_HASH
help
SHA-256 secure hash standard (DFIPS 180-2) implemented
using optimized ARM assembler and NEON, when available.
config CRYPTO_SHA512_ARM_NEON
tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
depends on KERNEL_MODE_NEON
+7 -1
View File
@@ -7,6 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
@@ -16,6 +17,8 @@ aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
@@ -28,4 +31,7 @@ quiet_cmd_perl = PERL $@
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
$(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
+2 -2
View File
@@ -163,7 +163,7 @@ static struct shash_alg algs[] = { {
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
.cra_priority = 200,
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@@ -180,7 +180,7 @@ static struct shash_alg algs[] = { {
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",
.cra_priority = 200,
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+246
View File
@@ -0,0 +1,246 @@
/*
* Glue code for the SHA256 Secure Hash Algorithm assembly implementation
* using optimized ARM assembler and NEON instructions.
*
* Copyright © 2015 Google Inc.
*
* This file is based on sha256_ssse3_glue.c:
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include "sha256_glue.h"
asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
unsigned int num_blks);
int sha256_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
return 0;
}
int sha224_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len,
unsigned int partial)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha256_block_data_order(sctx->state, sctx->buf, 1);
}
if (len - done >= SHA256_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
sha256_block_data_order(sctx->state, data + done, rounds);
done += rounds * SHA256_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
/* Handle the fast case right here */
if (partial + len < SHA256_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buf + partial, data, len);
return 0;
}
return __sha256_update(desc, data, len, partial);
}
/* Add padding and return the message digest. */
static int sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
/* save number of bits */
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA256_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
/* We need to fill a whole block for __sha256_update */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha256_update(desc, padding, padlen, index);
}
__sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56);
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha224_final(struct shash_desc *desc, u8 *out)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_final(desc, D);
memcpy(out, D, SHA224_DIGEST_SIZE);
memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
}
int sha256_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
int sha256_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_init,
.update = sha256_update,
.final = sha256_final,
.export = sha256_export,
.import = sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_init,
.update = sha256_update,
.final = sha224_final,
.export = sha256_export,
.import = sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha256_mod_init(void)
{
int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
if (res < 0)
return res;
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
res = crypto_register_shashes(sha256_neon_algs,
ARRAY_SIZE(sha256_neon_algs));
if (res < 0)
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
return res;
}
static void __exit sha256_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
crypto_unregister_shashes(sha256_neon_algs,
ARRAY_SIZE(sha256_neon_algs));
}
module_init(sha256_mod_init);
module_exit(sha256_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
MODULE_ALIAS_CRYPTO("sha256");
+23
View File
@@ -0,0 +1,23 @@
#ifndef _CRYPTO_SHA256_GLUE_H
#define _CRYPTO_SHA256_GLUE_H
#include <linux/crypto.h>
#include <crypto/sha.h>
extern struct shash_alg sha256_neon_algs[2];
extern int sha256_init(struct shash_desc *desc);
extern int sha224_init(struct shash_desc *desc);
extern int __sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial);
extern int sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len);
extern int sha256_export(struct shash_desc *desc, void *out);
extern int sha256_import(struct shash_desc *desc, const void *in);
#endif /* _CRYPTO_SHA256_GLUE_H */
+172
View File
@@ -0,0 +1,172 @@
/*
* Glue code for the SHA256 Secure Hash Algorithm assembly implementation
* using NEON instructions.
*
* Copyright © 2015 Google Inc.
*
* This file is based on sha512_neon_glue.c:
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include "sha256_glue.h"
asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
unsigned int num_blks);
static int __sha256_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha256_block_data_order_neon(sctx->state, sctx->buf, 1);
}
if (len - done >= SHA256_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
sha256_block_data_order_neon(sctx->state, data + done, rounds);
done += rounds * SHA256_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
static int sha256_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA256_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buf + partial, data, len);
return 0;
}
if (!may_use_simd()) {
res = __sha256_update(desc, data, len, partial);
} else {
kernel_neon_begin();
res = __sha256_neon_update(desc, data, len, partial);
kernel_neon_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha256_neon_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
/* save number of bits */
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA256_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
if (!may_use_simd()) {
sha256_update(desc, padding, padlen);
sha256_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_neon_begin();
/* We need to fill a whole block for __sha256_neon_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha256_neon_update(desc, padding, padlen, index);
}
__sha256_neon_update(desc, (const u8 *)&bits,
sizeof(bits), 56);
kernel_neon_end();
}
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memzero_explicit(sctx, sizeof(*sctx));
return 0;
}
static int sha224_neon_final(struct shash_desc *desc, u8 *out)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_neon_final(desc, D);
memcpy(out, D, SHA224_DIGEST_SIZE);
memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
}
struct shash_alg sha256_neon_algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_init,
.update = sha256_neon_update,
.final = sha256_neon_final,
.export = sha256_export,
.import = sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_init,
.update = sha256_neon_update,
.final = sha224_neon_final,
.export = sha256_export,
.import = sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };