mirror of
https://github.com/armbian/linux.git
synced 2026-01-06 10:13:00 -08:00
Backported ARM/NEON optimized crypto routines from kernel 3.13.y
This commit is contained in:
@@ -2501,6 +2501,13 @@ config NEON
|
||||
Say Y to include support code for NEON, the ARMv7 Advanced SIMD
|
||||
Extension.
|
||||
|
||||
config KERNEL_MODE_NEON
|
||||
bool "Support for NEON in kernel mode"
|
||||
default n
|
||||
depends on NEON
|
||||
help
|
||||
Say Y to include support for NEON in kernel mode.
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Userspace binary formats"
|
||||
|
||||
@@ -100,7 +100,7 @@ tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
|
||||
tune-$(CONFIG_CPU_V6K) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
|
||||
|
||||
ifeq ($(CONFIG_AEABI),y)
|
||||
CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork
|
||||
CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork -mfpu=vfp
|
||||
else
|
||||
CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,)
|
||||
endif
|
||||
@@ -118,6 +118,9 @@ AFLAGS_THUMB2 :=$(CFLAGS_THUMB2) -Wa$(comma)-mthumb
|
||||
ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y)
|
||||
CFLAGS_MODULE +=-fno-optimize-sibling-calls
|
||||
endif
|
||||
else
|
||||
CFLAGS_ISA :=$(call cc-option,-marm,)
|
||||
AFLAGS_ISA :=$(CFLAGS_ISA)
|
||||
endif
|
||||
|
||||
# Need -Uarm for gcc < 3.x
|
||||
@@ -263,6 +266,7 @@ core-$(CONFIG_VFP) += arch/arm/vfp/
|
||||
# If we have a machine-specific directory, then include it in the build.
|
||||
core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/
|
||||
core-y += arch/arm/net/
|
||||
core-y += arch/arm/crypto/
|
||||
core-y += $(machdirs) $(platdirs)
|
||||
|
||||
drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/
|
||||
|
||||
11
arch/arm/crypto/Makefile
Normal file
11
arch/arm/crypto/Makefile
Normal file
@@ -0,0 +1,11 @@
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
|
||||
|
||||
aes-arm-y := aes-armv4.o aes_glue.o
|
||||
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
|
||||
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
|
||||
1088
arch/arm/crypto/aes-armv4.S
Normal file
1088
arch/arm/crypto/aes-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
98
arch/arm/crypto/aes_glue.c
Normal file
98
arch/arm/crypto/aes_glue.c
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Glue Code for the asm optimized version of the AES Cipher Algorithm
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/aes.h>
|
||||
|
||||
#include "aes_glue.h"
|
||||
|
||||
EXPORT_SYMBOL(AES_encrypt);
|
||||
EXPORT_SYMBOL(AES_decrypt);
|
||||
EXPORT_SYMBOL(private_AES_set_encrypt_key);
|
||||
EXPORT_SYMBOL(private_AES_set_decrypt_key);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
AES_encrypt(src, dst, &ctx->enc_key);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
AES_decrypt(src, dst, &ctx->dec_key);
|
||||
}
|
||||
|
||||
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
switch (key_len) {
|
||||
case AES_KEYSIZE_128:
|
||||
key_len = 128;
|
||||
break;
|
||||
case AES_KEYSIZE_192:
|
||||
key_len = 192;
|
||||
break;
|
||||
case AES_KEYSIZE_256:
|
||||
key_len = 256;
|
||||
break;
|
||||
default:
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
/* private_AES_set_decrypt_key expects an encryption key as input */
|
||||
ctx->dec_key = ctx->enc_key;
|
||||
if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-asm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct AES_CTX),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = aes_set_key,
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS("aes");
|
||||
MODULE_ALIAS("aes-asm");
|
||||
MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");
|
||||
19
arch/arm/crypto/aes_glue.h
Normal file
19
arch/arm/crypto/aes_glue.h
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
#define AES_MAXNR 14
|
||||
|
||||
struct AES_KEY {
|
||||
unsigned int rd_key[4 * (AES_MAXNR + 1)];
|
||||
int rounds;
|
||||
};
|
||||
|
||||
struct AES_CTX {
|
||||
struct AES_KEY enc_key;
|
||||
struct AES_KEY dec_key;
|
||||
};
|
||||
|
||||
asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
|
||||
asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
|
||||
asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey,
|
||||
const int bits, struct AES_KEY *key);
|
||||
asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey,
|
||||
const int bits, struct AES_KEY *key);
|
||||
2532
arch/arm/crypto/aesbs-core.S
Normal file
2532
arch/arm/crypto/aesbs-core.S
Normal file
File diff suppressed because it is too large
Load Diff
434
arch/arm/crypto/aesbs-glue.c
Normal file
434
arch/arm/crypto/aesbs-glue.c
Normal file
@@ -0,0 +1,434 @@
|
||||
/*
|
||||
* linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/ablk_helper.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "aes_glue.h"
|
||||
|
||||
#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE)
|
||||
|
||||
struct BS_KEY {
|
||||
struct AES_KEY rk;
|
||||
int converted;
|
||||
u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE];
|
||||
} __aligned(8);
|
||||
|
||||
asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in);
|
||||
asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in);
|
||||
|
||||
asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 iv[]);
|
||||
|
||||
asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks,
|
||||
struct BS_KEY *key, u8 const iv[]);
|
||||
|
||||
asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 tweak[]);
|
||||
|
||||
asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 tweak[]);
|
||||
|
||||
struct aesbs_cbc_ctx {
|
||||
struct AES_KEY enc;
|
||||
struct BS_KEY dec;
|
||||
};
|
||||
|
||||
struct aesbs_ctr_ctx {
|
||||
struct BS_KEY enc;
|
||||
};
|
||||
|
||||
struct aesbs_xts_ctx {
|
||||
struct BS_KEY enc;
|
||||
struct BS_KEY dec;
|
||||
struct AES_KEY twkey;
|
||||
};
|
||||
|
||||
static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int bits = key_len * 8;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->dec.rk = ctx->enc;
|
||||
private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
|
||||
ctx->dec.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int bits = key_len * 8;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->enc.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int bits = key_len * 4;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->dec.rk = ctx->enc.rk;
|
||||
private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
|
||||
private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey);
|
||||
ctx->enc.converted = ctx->dec.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesbs_cbc_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
while (walk.nbytes) {
|
||||
u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *src = walk.src.virt.addr;
|
||||
|
||||
if (walk.dst.virt.addr == walk.src.virt.addr) {
|
||||
u8 *iv = walk.iv;
|
||||
|
||||
do {
|
||||
crypto_xor(src, iv, AES_BLOCK_SIZE);
|
||||
AES_encrypt(src, src, &ctx->enc);
|
||||
iv = src;
|
||||
src += AES_BLOCK_SIZE;
|
||||
} while (--blocks);
|
||||
memcpy(walk.iv, iv, AES_BLOCK_SIZE);
|
||||
} else {
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
do {
|
||||
crypto_xor(walk.iv, src, AES_BLOCK_SIZE);
|
||||
AES_encrypt(walk.iv, dst, &ctx->enc);
|
||||
memcpy(walk.iv, dst, AES_BLOCK_SIZE);
|
||||
src += AES_BLOCK_SIZE;
|
||||
dst += AES_BLOCK_SIZE;
|
||||
} while (--blocks);
|
||||
}
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_cbc_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
|
||||
|
||||
while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) {
|
||||
kernel_neon_begin();
|
||||
bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
|
||||
walk.nbytes, &ctx->dec, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
while (walk.nbytes) {
|
||||
u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 *src = walk.src.virt.addr;
|
||||
u8 bk[2][AES_BLOCK_SIZE];
|
||||
u8 *iv = walk.iv;
|
||||
|
||||
do {
|
||||
if (walk.dst.virt.addr == walk.src.virt.addr)
|
||||
memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE);
|
||||
|
||||
AES_decrypt(src, dst, &ctx->dec.rk);
|
||||
crypto_xor(dst, iv, AES_BLOCK_SIZE);
|
||||
|
||||
if (walk.dst.virt.addr == walk.src.virt.addr)
|
||||
iv = bk[blocks & 1];
|
||||
else
|
||||
iv = src;
|
||||
|
||||
dst += AES_BLOCK_SIZE;
|
||||
src += AES_BLOCK_SIZE;
|
||||
} while (--blocks);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void inc_be128_ctr(__be32 ctr[], u32 addend)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 3; i >= 0; i--, addend = 1) {
|
||||
u32 n = be32_to_cpu(ctr[i]) + addend;
|
||||
|
||||
ctr[i] = cpu_to_be32(n);
|
||||
if (n >= addend)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
u32 blocks;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
|
||||
|
||||
while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
|
||||
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
|
||||
__be32 *ctr = (__be32 *)walk.iv;
|
||||
u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]);
|
||||
|
||||
/* avoid 32 bit counter overflow in the NEON code */
|
||||
if (unlikely(headroom < blocks)) {
|
||||
blocks = headroom + 1;
|
||||
tail = walk.nbytes - blocks * AES_BLOCK_SIZE;
|
||||
}
|
||||
kernel_neon_begin();
|
||||
bsaes_ctr32_encrypt_blocks(walk.src.virt.addr,
|
||||
walk.dst.virt.addr, blocks,
|
||||
&ctx->enc, walk.iv);
|
||||
kernel_neon_end();
|
||||
inc_be128_ctr(ctr, blocks);
|
||||
|
||||
nbytes -= blocks * AES_BLOCK_SIZE;
|
||||
if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE)
|
||||
break;
|
||||
|
||||
err = blkcipher_walk_done(desc, &walk, tail);
|
||||
}
|
||||
if (walk.nbytes) {
|
||||
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 ks[AES_BLOCK_SIZE];
|
||||
|
||||
AES_encrypt(walk.iv, ks, &ctx->enc.rk);
|
||||
if (tdst != tsrc)
|
||||
memcpy(tdst, tsrc, nbytes);
|
||||
crypto_xor(tdst, ks, nbytes);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
|
||||
|
||||
/* generate the initial tweak */
|
||||
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
|
||||
|
||||
while (walk.nbytes) {
|
||||
kernel_neon_begin();
|
||||
bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
|
||||
walk.nbytes, &ctx->enc, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
|
||||
|
||||
/* generate the initial tweak */
|
||||
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
|
||||
|
||||
while (walk.nbytes) {
|
||||
kernel_neon_begin();
|
||||
bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
|
||||
walk.nbytes, &ctx->dec, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg aesbs_algs[] = { {
|
||||
.cra_name = "__cbc-aes-neonbs",
|
||||
.cra_driver_name = "__driver-cbc-aes-neonbs",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_blkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_cbc_set_key,
|
||||
.encrypt = aesbs_cbc_encrypt,
|
||||
.decrypt = aesbs_cbc_decrypt,
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__ctr-aes-neonbs",
|
||||
.cra_driver_name = "__driver-ctr-aes-neonbs",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_blkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_ctr_set_key,
|
||||
.encrypt = aesbs_ctr_encrypt,
|
||||
.decrypt = aesbs_ctr_encrypt,
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__xts-aes-neonbs",
|
||||
.cra_driver_name = "__driver-xts-aes-neonbs",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_blkcipher = {
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_xts_set_key,
|
||||
.encrypt = aesbs_xts_encrypt,
|
||||
.decrypt = aesbs_xts_decrypt,
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(aes)",
|
||||
.cra_driver_name = "cbc-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = __ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
}
|
||||
}, {
|
||||
.cra_name = "ctr(aes)",
|
||||
.cra_driver_name = "ctr-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
}
|
||||
}, {
|
||||
.cra_name = "xts(aes)",
|
||||
.cra_driver_name = "xts-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_ablkcipher = {
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
}
|
||||
} };
|
||||
|
||||
static int __init aesbs_mod_init(void)
|
||||
{
|
||||
if (!cpu_has_neon())
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
|
||||
}
|
||||
|
||||
static void __exit aesbs_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
|
||||
}
|
||||
|
||||
module_init(aesbs_mod_init);
|
||||
module_exit(aesbs_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL");
|
||||
497
arch/arm/crypto/sha1-armv4-large.S
Normal file
497
arch/arm/crypto/sha1-armv4-large.S
Normal file
@@ -0,0 +1,497 @@
|
||||
#define __ARM_ARCH__ __LINUX_ARM_ARCH__
|
||||
@ ====================================================================
|
||||
@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
@ project. The module is, however, dual licensed under OpenSSL and
|
||||
@ CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
@ details see http://www.openssl.org/~appro/cryptogams/.
|
||||
@ ====================================================================
|
||||
|
||||
@ sha1_block procedure for ARMv4.
|
||||
@
|
||||
@ January 2007.
|
||||
|
||||
@ Size/performance trade-off
|
||||
@ ====================================================================
|
||||
@ impl size in bytes comp cycles[*] measured performance
|
||||
@ ====================================================================
|
||||
@ thumb 304 3212 4420
|
||||
@ armv4-small 392/+29% 1958/+64% 2250/+96%
|
||||
@ armv4-compact 740/+89% 1552/+26% 1840/+22%
|
||||
@ armv4-large 1420/+92% 1307/+19% 1370/+34%[***]
|
||||
@ full unroll ~5100/+260% ~1260/+4% ~1300/+5%
|
||||
@ ====================================================================
|
||||
@ thumb = same as 'small' but in Thumb instructions[**] and
|
||||
@ with recurring code in two private functions;
|
||||
@ small = detached Xload/update, loops are folded;
|
||||
@ compact = detached Xload/update, 5x unroll;
|
||||
@ large = interleaved Xload/update, 5x unroll;
|
||||
@ full unroll = interleaved Xload/update, full unroll, estimated[!];
|
||||
@
|
||||
@ [*] Manually counted instructions in "grand" loop body. Measured
|
||||
@ performance is affected by prologue and epilogue overhead,
|
||||
@ i-cache availability, branch penalties, etc.
|
||||
@ [**] While each Thumb instruction is twice smaller, they are not as
|
||||
@ diverse as ARM ones: e.g., there are only two arithmetic
|
||||
@ instructions with 3 arguments, no [fixed] rotate, addressing
|
||||
@ modes are limited. As result it takes more instructions to do
|
||||
@ the same job in Thumb, therefore the code is never twice as
|
||||
@ small and always slower.
|
||||
@ [***] which is also ~35% better than compiler generated code. Dual-
|
||||
@ issue Cortex A8 core was measured to process input block in
|
||||
@ ~990 cycles.
|
||||
|
||||
@ August 2010.
|
||||
@
|
||||
@ Rescheduling for dual-issue pipeline resulted in 13% improvement on
|
||||
@ Cortex A8 core and in absolute terms ~870 cycles per input block
|
||||
@ [or 13.6 cycles per byte].
|
||||
|
||||
@ February 2011.
|
||||
@
|
||||
@ Profiler-assisted and platform-specific optimization resulted in 10%
|
||||
@ improvement on Cortex A8 core and 12.2 cycles per byte.
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
|
||||
.align 2
|
||||
ENTRY(sha1_block_data_order)
|
||||
stmdb sp!,{r4-r12,lr}
|
||||
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
||||
ldmia r0,{r3,r4,r5,r6,r7}
|
||||
.Lloop:
|
||||
ldr r8,.LK_00_19
|
||||
mov r14,sp
|
||||
sub sp,sp,#15*4
|
||||
mov r5,r5,ror#30
|
||||
mov r6,r6,ror#30
|
||||
mov r7,r7,ror#30 @ [6]
|
||||
.L_00_15:
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
add r7,r8,r7,ror#2 @ E+=K_00_19
|
||||
ldrb r12,[r1],#4
|
||||
orr r9,r9,r10,lsl#8
|
||||
eor r10,r5,r6 @ F_xx_xx
|
||||
orr r9,r9,r11,lsl#16
|
||||
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
|
||||
orr r9,r9,r12,lsl#24
|
||||
#else
|
||||
ldr r9,[r1],#4 @ handles unaligned
|
||||
add r7,r8,r7,ror#2 @ E+=K_00_19
|
||||
eor r10,r5,r6 @ F_xx_xx
|
||||
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
|
||||
#ifdef __ARMEL__
|
||||
rev r9,r9 @ byte swap
|
||||
#endif
|
||||
#endif
|
||||
and r10,r4,r10,ror#2
|
||||
add r7,r7,r9 @ E+=X[i]
|
||||
eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r7,r7,r10 @ E+=F_00_19(B,C,D)
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
add r6,r8,r6,ror#2 @ E+=K_00_19
|
||||
ldrb r12,[r1],#4
|
||||
orr r9,r9,r10,lsl#8
|
||||
eor r10,r4,r5 @ F_xx_xx
|
||||
orr r9,r9,r11,lsl#16
|
||||
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
|
||||
orr r9,r9,r12,lsl#24
|
||||
#else
|
||||
ldr r9,[r1],#4 @ handles unaligned
|
||||
add r6,r8,r6,ror#2 @ E+=K_00_19
|
||||
eor r10,r4,r5 @ F_xx_xx
|
||||
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
|
||||
#ifdef __ARMEL__
|
||||
rev r9,r9 @ byte swap
|
||||
#endif
|
||||
#endif
|
||||
and r10,r3,r10,ror#2
|
||||
add r6,r6,r9 @ E+=X[i]
|
||||
eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r6,r6,r10 @ E+=F_00_19(B,C,D)
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
add r5,r8,r5,ror#2 @ E+=K_00_19
|
||||
ldrb r12,[r1],#4
|
||||
orr r9,r9,r10,lsl#8
|
||||
eor r10,r3,r4 @ F_xx_xx
|
||||
orr r9,r9,r11,lsl#16
|
||||
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
|
||||
orr r9,r9,r12,lsl#24
|
||||
#else
|
||||
ldr r9,[r1],#4 @ handles unaligned
|
||||
add r5,r8,r5,ror#2 @ E+=K_00_19
|
||||
eor r10,r3,r4 @ F_xx_xx
|
||||
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
|
||||
#ifdef __ARMEL__
|
||||
rev r9,r9 @ byte swap
|
||||
#endif
|
||||
#endif
|
||||
and r10,r7,r10,ror#2
|
||||
add r5,r5,r9 @ E+=X[i]
|
||||
eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r5,r5,r10 @ E+=F_00_19(B,C,D)
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
add r4,r8,r4,ror#2 @ E+=K_00_19
|
||||
ldrb r12,[r1],#4
|
||||
orr r9,r9,r10,lsl#8
|
||||
eor r10,r7,r3 @ F_xx_xx
|
||||
orr r9,r9,r11,lsl#16
|
||||
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
|
||||
orr r9,r9,r12,lsl#24
|
||||
#else
|
||||
ldr r9,[r1],#4 @ handles unaligned
|
||||
add r4,r8,r4,ror#2 @ E+=K_00_19
|
||||
eor r10,r7,r3 @ F_xx_xx
|
||||
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
|
||||
#ifdef __ARMEL__
|
||||
rev r9,r9 @ byte swap
|
||||
#endif
|
||||
#endif
|
||||
and r10,r6,r10,ror#2
|
||||
add r4,r4,r9 @ E+=X[i]
|
||||
eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r4,r4,r10 @ E+=F_00_19(B,C,D)
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
add r3,r8,r3,ror#2 @ E+=K_00_19
|
||||
ldrb r12,[r1],#4
|
||||
orr r9,r9,r10,lsl#8
|
||||
eor r10,r6,r7 @ F_xx_xx
|
||||
orr r9,r9,r11,lsl#16
|
||||
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
|
||||
orr r9,r9,r12,lsl#24
|
||||
#else
|
||||
ldr r9,[r1],#4 @ handles unaligned
|
||||
add r3,r8,r3,ror#2 @ E+=K_00_19
|
||||
eor r10,r6,r7 @ F_xx_xx
|
||||
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
|
||||
#ifdef __ARMEL__
|
||||
rev r9,r9 @ byte swap
|
||||
#endif
|
||||
#endif
|
||||
and r10,r5,r10,ror#2
|
||||
add r3,r3,r9 @ E+=X[i]
|
||||
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r3,r3,r10 @ E+=F_00_19(B,C,D)
|
||||
cmp r14,sp
|
||||
bne .L_00_15 @ [((11+4)*5+2)*3]
|
||||
sub sp,sp,#25*4
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb r10,[r1,#2]
|
||||
ldrb r9,[r1,#3]
|
||||
ldrb r11,[r1,#1]
|
||||
add r7,r8,r7,ror#2 @ E+=K_00_19
|
||||
ldrb r12,[r1],#4
|
||||
orr r9,r9,r10,lsl#8
|
||||
eor r10,r5,r6 @ F_xx_xx
|
||||
orr r9,r9,r11,lsl#16
|
||||
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
|
||||
orr r9,r9,r12,lsl#24
|
||||
#else
|
||||
ldr r9,[r1],#4 @ handles unaligned
|
||||
add r7,r8,r7,ror#2 @ E+=K_00_19
|
||||
eor r10,r5,r6 @ F_xx_xx
|
||||
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
|
||||
#ifdef __ARMEL__
|
||||
rev r9,r9 @ byte swap
|
||||
#endif
|
||||
#endif
|
||||
and r10,r4,r10,ror#2
|
||||
add r7,r7,r9 @ E+=X[i]
|
||||
eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
|
||||
str r9,[r14,#-4]!
|
||||
add r7,r7,r10 @ E+=F_00_19(B,C,D)
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r6,r8,r6,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r4,r5 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r3,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r6,r6,r9 @ E+=X[i]
|
||||
eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
|
||||
add r6,r6,r10 @ E+=F_00_19(B,C,D)
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r5,r8,r5,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r3,r4 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r7,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r5,r5,r9 @ E+=X[i]
|
||||
eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
|
||||
add r5,r5,r10 @ E+=F_00_19(B,C,D)
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r4,r8,r4,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r7,r3 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r6,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r4,r4,r9 @ E+=X[i]
|
||||
eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
|
||||
add r4,r4,r10 @ E+=F_00_19(B,C,D)
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r3,r8,r3,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r6,r7 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r5,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r3,r3,r9 @ E+=X[i]
|
||||
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
|
||||
add r3,r3,r10 @ E+=F_00_19(B,C,D)
|
||||
|
||||
ldr r8,.LK_20_39 @ [+15+16*4]
|
||||
cmn sp,#0 @ [+3], clear carry to denote 20_39
|
||||
.L_20_39_or_60_79:
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r7,r8,r7,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r5,r6 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
eor r10,r4,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r7,r7,r9 @ E+=X[i]
|
||||
add r7,r7,r10 @ E+=F_20_39(B,C,D)
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r6,r8,r6,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r4,r5 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
eor r10,r3,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r6,r6,r9 @ E+=X[i]
|
||||
add r6,r6,r10 @ E+=F_20_39(B,C,D)
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r5,r8,r5,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r3,r4 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
eor r10,r7,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r5,r5,r9 @ E+=X[i]
|
||||
add r5,r5,r10 @ E+=F_20_39(B,C,D)
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r4,r8,r4,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r7,r3 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
eor r10,r6,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r4,r4,r9 @ E+=X[i]
|
||||
add r4,r4,r10 @ E+=F_20_39(B,C,D)
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r3,r8,r3,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r6,r7 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
eor r10,r5,r10,ror#2 @ F_xx_xx
|
||||
@ F_xx_xx
|
||||
add r3,r3,r9 @ E+=X[i]
|
||||
add r3,r3,r10 @ E+=F_20_39(B,C,D)
|
||||
ARM( teq r14,sp ) @ preserve carry
|
||||
THUMB( mov r11,sp )
|
||||
THUMB( teq r14,r11 ) @ preserve carry
|
||||
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
|
||||
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
|
||||
|
||||
ldr r8,.LK_40_59
|
||||
sub sp,sp,#20*4 @ [+2]
|
||||
.L_40_59:
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r7,r8,r7,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r5,r6 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r4,r10,ror#2 @ F_xx_xx
|
||||
and r11,r5,r6 @ F_xx_xx
|
||||
add r7,r7,r9 @ E+=X[i]
|
||||
add r7,r7,r10 @ E+=F_40_59(B,C,D)
|
||||
add r7,r7,r11,ror#2
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r6,r8,r6,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r4,r5 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r3,r10,ror#2 @ F_xx_xx
|
||||
and r11,r4,r5 @ F_xx_xx
|
||||
add r6,r6,r9 @ E+=X[i]
|
||||
add r6,r6,r10 @ E+=F_40_59(B,C,D)
|
||||
add r6,r6,r11,ror#2
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r5,r8,r5,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r3,r4 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r7,r10,ror#2 @ F_xx_xx
|
||||
and r11,r3,r4 @ F_xx_xx
|
||||
add r5,r5,r9 @ E+=X[i]
|
||||
add r5,r5,r10 @ E+=F_40_59(B,C,D)
|
||||
add r5,r5,r11,ror#2
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r4,r8,r4,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r7,r3 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r6,r10,ror#2 @ F_xx_xx
|
||||
and r11,r7,r3 @ F_xx_xx
|
||||
add r4,r4,r9 @ E+=X[i]
|
||||
add r4,r4,r10 @ E+=F_40_59(B,C,D)
|
||||
add r4,r4,r11,ror#2
|
||||
ldr r9,[r14,#15*4]
|
||||
ldr r10,[r14,#13*4]
|
||||
ldr r11,[r14,#7*4]
|
||||
add r3,r8,r3,ror#2 @ E+=K_xx_xx
|
||||
ldr r12,[r14,#2*4]
|
||||
eor r9,r9,r10
|
||||
eor r11,r11,r12 @ 1 cycle stall
|
||||
eor r10,r6,r7 @ F_xx_xx
|
||||
mov r9,r9,ror#31
|
||||
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
|
||||
eor r9,r9,r11,ror#31
|
||||
str r9,[r14,#-4]!
|
||||
and r10,r5,r10,ror#2 @ F_xx_xx
|
||||
and r11,r6,r7 @ F_xx_xx
|
||||
add r3,r3,r9 @ E+=X[i]
|
||||
add r3,r3,r10 @ E+=F_40_59(B,C,D)
|
||||
add r3,r3,r11,ror#2
|
||||
cmp r14,sp
|
||||
bne .L_40_59 @ [+((12+5)*5+2)*4]
|
||||
|
||||
ldr r8,.LK_60_79
|
||||
sub sp,sp,#20*4
|
||||
cmp sp,#0 @ set carry to denote 60_79
|
||||
b .L_20_39_or_60_79 @ [+4], spare 300 bytes
|
||||
.L_done:
|
||||
add sp,sp,#80*4 @ "deallocate" stack frame
|
||||
ldmia r0,{r8,r9,r10,r11,r12}
|
||||
add r3,r8,r3
|
||||
add r4,r9,r4
|
||||
add r5,r10,r5,ror#2
|
||||
add r6,r11,r6,ror#2
|
||||
add r7,r12,r7,ror#2
|
||||
stmia r0,{r3,r4,r5,r6,r7}
|
||||
teq r1,r2
|
||||
bne .Lloop @ [+18], total 1307
|
||||
|
||||
ldmia sp!,{r4-r12,pc}
|
||||
.align 2
|
||||
.LK_00_19: .word 0x5a827999
|
||||
.LK_20_39: .word 0x6ed9eba1
|
||||
.LK_40_59: .word 0x8f1bbcdc
|
||||
.LK_60_79: .word 0xca62c1d6
|
||||
ENDPROC(sha1_block_data_order)
|
||||
.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
|
||||
.align 2
|
||||
179
arch/arm/crypto/sha1_glue.c
Normal file
179
arch/arm/crypto/sha1_glue.c
Normal file
@@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Cryptographic API.
|
||||
* Glue code for the SHA1 Secure Hash Algorithm assembler implementation
|
||||
*
|
||||
* This file is based on sha1_generic.c and sha1_ssse3_glue.c
|
||||
*
|
||||
* Copyright (c) Alan Smithee.
|
||||
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
|
||||
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
|
||||
* Copyright (c) Mathias Krause <minipli@googlemail.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
struct SHA1_CTX {
|
||||
uint32_t h0,h1,h2,h3,h4;
|
||||
u64 count;
|
||||
u8 data[SHA1_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
|
||||
const unsigned char *data, unsigned int rounds);
|
||||
|
||||
|
||||
static int sha1_init(struct shash_desc *desc)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
sctx->h0 = SHA1_H0;
|
||||
sctx->h1 = SHA1_H1;
|
||||
sctx->h2 = SHA1_H2;
|
||||
sctx->h3 = SHA1_H3;
|
||||
sctx->h4 = SHA1_H4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
unsigned int done = 0;
|
||||
|
||||
sctx->count += len;
|
||||
|
||||
if (partial) {
|
||||
done = SHA1_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->data + partial, data, done);
|
||||
sha1_block_data_order(sctx, sctx->data, 1);
|
||||
}
|
||||
|
||||
if (len - done >= SHA1_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
|
||||
sha1_block_data_order(sctx, data + done, rounds);
|
||||
done += rounds * SHA1_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->data, data + done, len - done);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int sha1_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
|
||||
int res;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA1_BLOCK_SIZE) {
|
||||
sctx->count += len;
|
||||
memcpy(sctx->data + partial, data, len);
|
||||
return 0;
|
||||
}
|
||||
res = __sha1_update(sctx, data, len, partial);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int sha1_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be32 *dst = (__be32 *)out;
|
||||
__be64 bits;
|
||||
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
bits = cpu_to_be64(sctx->count << 3);
|
||||
|
||||
/* Pad out to 56 mod 64 and append length */
|
||||
index = sctx->count % SHA1_BLOCK_SIZE;
|
||||
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
|
||||
/* We need to fill a whole block for __sha1_update() */
|
||||
if (padlen <= 56) {
|
||||
sctx->count += padlen;
|
||||
memcpy(sctx->data + index, padding, padlen);
|
||||
} else {
|
||||
__sha1_update(sctx, padding, padlen, index);
|
||||
}
|
||||
__sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 5; i++)
|
||||
dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int sha1_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int sha1_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = SHA1_DIGEST_SIZE,
|
||||
.init = sha1_init,
|
||||
.update = sha1_update,
|
||||
.final = sha1_final,
|
||||
.export = sha1_export,
|
||||
.import = sha1_import,
|
||||
.descsize = sizeof(struct SHA1_CTX),
|
||||
.statesize = sizeof(struct SHA1_CTX),
|
||||
.base = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-asm",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static int __init sha1_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
|
||||
static void __exit sha1_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
|
||||
module_init(sha1_mod_init);
|
||||
module_exit(sha1_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)");
|
||||
MODULE_ALIAS("sha1");
|
||||
MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");
|
||||
36
arch/arm/include/asm/neon.h
Normal file
36
arch/arm/include/asm/neon.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* linux/arch/arm/include/asm/neon.h
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
#define cpu_has_neon() (!!(elf_hwcap & HWCAP_NEON))
|
||||
|
||||
#ifdef __ARM_NEON__
|
||||
|
||||
/*
|
||||
* If you are affected by the BUILD_BUG below, it probably means that you are
|
||||
* using NEON code /and/ calling the kernel_neon_begin() function from the same
|
||||
* compilation unit. To prevent issues that may arise from GCC reordering or
|
||||
* generating(1) NEON instructions outside of these begin/end functions, the
|
||||
* only supported way of using NEON code in the kernel is by isolating it in a
|
||||
* separate compilation unit, and calling it from another unit from inside a
|
||||
* kernel_neon_begin/kernel_neon_end pair.
|
||||
*
|
||||
* (1) Current GCC (4.7) might generate NEON instructions at O3 level if
|
||||
* -mpfu=neon is set.
|
||||
*/
|
||||
|
||||
#define kernel_neon_begin() \
|
||||
BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code")
|
||||
|
||||
#else
|
||||
void kernel_neon_begin(void);
|
||||
#endif
|
||||
void kernel_neon_end(void);
|
||||
14
arch/arm/include/asm/simd.h
Normal file
14
arch/arm/include/asm/simd.h
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
/*
|
||||
* may_use_simd - whether it is allowable at this time to issue SIMD
|
||||
* instructions or access the SIMD register file
|
||||
*
|
||||
* As architectures typically don't preserve the SIMD register file when
|
||||
* taking an interrupt, !in_interrupt() should be a reasonable default.
|
||||
*/
|
||||
static __must_check inline bool may_use_simd(void)
|
||||
{
|
||||
return !in_interrupt();
|
||||
}
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/user.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
#include <asm/cp15.h>
|
||||
#include <asm/cputype.h>
|
||||
@@ -648,6 +649,52 @@ static int vfp_hotplug(struct notifier_block *b, unsigned long action,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
|
||||
/*
|
||||
* Kernel-side NEON support functions
|
||||
*/
|
||||
void kernel_neon_begin(void)
|
||||
{
|
||||
struct thread_info *thread = current_thread_info();
|
||||
unsigned int cpu;
|
||||
u32 fpexc;
|
||||
|
||||
/*
|
||||
* Kernel mode NEON is only allowed outside of interrupt context
|
||||
* with preemption disabled. This will make sure that the kernel
|
||||
* mode NEON register contents never need to be preserved.
|
||||
*/
|
||||
BUG_ON(in_interrupt());
|
||||
cpu = get_cpu();
|
||||
|
||||
fpexc = fmrx(FPEXC) | FPEXC_EN;
|
||||
fmxr(FPEXC, fpexc);
|
||||
|
||||
/*
|
||||
* Save the userland NEON/VFP state. Under UP,
|
||||
* the owner could be a task other than 'current'
|
||||
*/
|
||||
if (vfp_state_in_hw(cpu, thread))
|
||||
vfp_save_state(&thread->vfpstate, fpexc);
|
||||
#ifndef CONFIG_SMP
|
||||
else if (vfp_current_hw_state[cpu] != NULL)
|
||||
vfp_save_state(vfp_current_hw_state[cpu], fpexc);
|
||||
#endif
|
||||
vfp_current_hw_state[cpu] = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_neon_begin);
|
||||
|
||||
void kernel_neon_end(void)
|
||||
{
|
||||
/* Disable the NEON/VFP unit. */
|
||||
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
|
||||
put_cpu();
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_neon_end);
|
||||
|
||||
#endif /* CONFIG_KERNEL_MODE_NEON */
|
||||
|
||||
/*
|
||||
* VFP support code initialisation.
|
||||
*/
|
||||
|
||||
@@ -174,6 +174,10 @@ config CRYPTO_TEST
|
||||
help
|
||||
Quick & dirty crypto test module.
|
||||
|
||||
config CRYPTO_ABLK_HELPER
|
||||
tristate
|
||||
select CRYPTO_CRYPTD
|
||||
|
||||
comment "Authenticated Encryption with Associated Data"
|
||||
|
||||
config CRYPTO_CCM
|
||||
@@ -423,6 +427,15 @@ config CRYPTO_SHA1_SSSE3
|
||||
using Supplemental SSE3 (SSSE3) instructions or Advanced Vector
|
||||
Extensions (AVX), when available.
|
||||
|
||||
config CRYPTO_SHA1_ARM
|
||||
tristate "SHA1 digest algorithm (ARM-asm)"
|
||||
depends on ARM
|
||||
select CRYPTO_SHA1
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
|
||||
using optimized ARM assembler.
|
||||
|
||||
config CRYPTO_SHA256
|
||||
tristate "SHA224 and SHA256 digest algorithm"
|
||||
select CRYPTO_HASH
|
||||
@@ -577,6 +590,46 @@ config CRYPTO_AES_NI_INTEL
|
||||
ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
|
||||
acceleration for CTR.
|
||||
|
||||
config CRYPTO_AES_ARM
|
||||
tristate "AES cipher algorithms (ARM-asm)"
|
||||
depends on ARM
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES
|
||||
help
|
||||
Use optimized AES assembler routines for ARM platforms.
|
||||
|
||||
AES cipher algorithms (FIPS-197). AES uses the Rijndael
|
||||
algorithm.
|
||||
|
||||
Rijndael appears to be consistently a very good performer in
|
||||
both hardware and software across a wide range of computing
|
||||
environments regardless of its use in feedback or non-feedback
|
||||
modes. Its key setup time is excellent, and its key agility is
|
||||
good. Rijndael's very low memory requirements make it very well
|
||||
suited for restricted-space environments, in which it also
|
||||
demonstrates excellent performance. Rijndael's operations are
|
||||
among the easiest to defend against power and timing attacks.
|
||||
|
||||
The AES specifies three key sizes: 128, 192 and 256 bits
|
||||
|
||||
See <http://csrc.nist.gov/encryption/aes/> for more information.
|
||||
|
||||
config CRYPTO_AES_ARM_BS
|
||||
tristate "Bit sliced AES using NEON instructions"
|
||||
depends on ARM && KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES_ARM
|
||||
select CRYPTO_ABLK_HELPER
|
||||
help
|
||||
Use a faster and more secure NEON based implementation of AES in CBC,
|
||||
CTR and XTS modes
|
||||
|
||||
Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
|
||||
and for XTS mode encryption, CBC and XTS mode decryption speedup is
|
||||
around 25%. (CBC encryption speed is not affected by this driver.)
|
||||
This implementation does not rely on any lookup tables so it is
|
||||
believed to be invulnerable to cache timing attacks.
|
||||
|
||||
config CRYPTO_ANUBIS
|
||||
tristate "Anubis cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
|
||||
@@ -98,3 +98,4 @@ obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
|
||||
obj-$(CONFIG_XOR_BLOCKS) += xor.o
|
||||
obj-$(CONFIG_ASYNC_CORE) += async_tx/
|
||||
obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
|
||||
obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
|
||||
|
||||
150
crypto/ablk_helper.c
Normal file
150
crypto/ablk_helper.c
Normal file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* Shared async block cipher helpers
|
||||
*
|
||||
* Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* Based on aesni-intel_glue.c by:
|
||||
* Copyright (C) 2008, Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/ablk_helper.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
|
||||
struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
|
||||
int err;
|
||||
|
||||
crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
|
||||
& CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_ablkcipher_setkey(child, key, key_len);
|
||||
crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
|
||||
& CRYPTO_TFM_RES_MASK);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ablk_set_key);
|
||||
|
||||
int __ablk_encrypt(struct ablkcipher_request *req)
|
||||
{
|
||||
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
|
||||
struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
|
||||
struct blkcipher_desc desc;
|
||||
|
||||
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
|
||||
desc.info = req->info;
|
||||
desc.flags = 0;
|
||||
|
||||
return crypto_blkcipher_crt(desc.tfm)->encrypt(
|
||||
&desc, req->dst, req->src, req->nbytes);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__ablk_encrypt);
|
||||
|
||||
int ablk_encrypt(struct ablkcipher_request *req)
|
||||
{
|
||||
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
|
||||
struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
struct ablkcipher_request *cryptd_req =
|
||||
ablkcipher_request_ctx(req);
|
||||
|
||||
*cryptd_req = *req;
|
||||
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
|
||||
|
||||
return crypto_ablkcipher_encrypt(cryptd_req);
|
||||
} else {
|
||||
return __ablk_encrypt(req);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ablk_encrypt);
|
||||
|
||||
int ablk_decrypt(struct ablkcipher_request *req)
|
||||
{
|
||||
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
|
||||
struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
struct ablkcipher_request *cryptd_req =
|
||||
ablkcipher_request_ctx(req);
|
||||
|
||||
*cryptd_req = *req;
|
||||
ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
|
||||
|
||||
return crypto_ablkcipher_decrypt(cryptd_req);
|
||||
} else {
|
||||
struct blkcipher_desc desc;
|
||||
|
||||
desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
|
||||
desc.info = req->info;
|
||||
desc.flags = 0;
|
||||
|
||||
return crypto_blkcipher_crt(desc.tfm)->decrypt(
|
||||
&desc, req->dst, req->src, req->nbytes);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ablk_decrypt);
|
||||
|
||||
void ablk_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
cryptd_free_ablkcipher(ctx->cryptd_tfm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ablk_exit);
|
||||
|
||||
int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
|
||||
{
|
||||
struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct cryptd_ablkcipher *cryptd_tfm;
|
||||
|
||||
cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
|
||||
if (IS_ERR(cryptd_tfm))
|
||||
return PTR_ERR(cryptd_tfm);
|
||||
|
||||
ctx->cryptd_tfm = cryptd_tfm;
|
||||
tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
|
||||
crypto_ablkcipher_reqsize(&cryptd_tfm->base);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ablk_init_common);
|
||||
|
||||
int ablk_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
char drv_name[CRYPTO_MAX_ALG_NAME];
|
||||
|
||||
snprintf(drv_name, sizeof(drv_name), "__driver-%s",
|
||||
crypto_tfm_alg_driver_name(tfm));
|
||||
|
||||
return ablk_init_common(tfm, drv_name);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ablk_init);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
31
include/crypto/ablk_helper.h
Normal file
31
include/crypto/ablk_helper.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Shared async block cipher helpers
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_ABLK_HELPER_H
|
||||
#define _CRYPTO_ABLK_HELPER_H
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <crypto/cryptd.h>
|
||||
|
||||
struct async_helper_ctx {
|
||||
struct cryptd_ablkcipher *cryptd_tfm;
|
||||
};
|
||||
|
||||
extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
|
||||
unsigned int key_len);
|
||||
|
||||
extern int __ablk_encrypt(struct ablkcipher_request *req);
|
||||
|
||||
extern int ablk_encrypt(struct ablkcipher_request *req);
|
||||
|
||||
extern int ablk_decrypt(struct ablkcipher_request *req);
|
||||
|
||||
extern void ablk_exit(struct crypto_tfm *tfm);
|
||||
|
||||
extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
|
||||
|
||||
extern int ablk_init(struct crypto_tfm *tfm);
|
||||
|
||||
#endif /* _CRYPTO_ABLK_HELPER_H */
|
||||
Reference in New Issue
Block a user