You've already forked linux-rockchip
mirror of
https://github.com/armbian/linux-rockchip.git
synced 2026-01-06 11:08:10 -08:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu:
"API:
- Add library interfaces of certain crypto algorithms for WireGuard
- Remove the obsolete ablkcipher and blkcipher interfaces
- Move add_early_randomness() out of rng_mutex
Algorithms:
- Add blake2b shash algorithm
- Add blake2s shash algorithm
- Add curve25519 kpp algorithm
- Implement 4 way interleave in arm64/gcm-ce
- Implement ciphertext stealing in powerpc/spe-xts
- Add Eric Biggers's scalar accelerated ChaCha code for ARM
- Add accelerated 32r2 code from Zinc for MIPS
- Add OpenSSL/CRYPTOGRAMS poly1305 implementation for ARM and MIPS
Drivers:
- Fix entropy reading failures in ks-sa
- Add support for sam9x60 in atmel
- Add crypto accelerator for amlogic GXL
- Add sun8i-ce Crypto Engine
- Add sun8i-ss cryptographic offloader
- Add a host of algorithms to inside-secure
- Add NPCM RNG driver
- add HiSilicon HPRE accelerator
- Add HiSilicon TRNG driver"
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (285 commits)
crypto: vmx - Avoid weird build failures
crypto: lib/chacha20poly1305 - use chacha20_crypt()
crypto: x86/chacha - only unregister algorithms if registered
crypto: chacha_generic - remove unnecessary setkey() functions
crypto: amlogic - enable working on big endian kernel
crypto: sun8i-ce - enable working on big endian
crypto: mips/chacha - select CRYPTO_SKCIPHER, not CRYPTO_BLKCIPHER
hwrng: ks-sa - Enable COMPILE_TEST
crypto: essiv - remove redundant null pointer check before kfree
crypto: atmel-aes - Change data type for "lastc" buffer
crypto: atmel-tdes - Set the IV after {en,de}crypt
crypto: sun4i-ss - fix big endian issues
crypto: sun4i-ss - hide the Invalid keylen message
crypto: sun4i-ss - use crypto_ahash_digestsize
crypto: sun4i-ss - remove dependency on not 64BIT
crypto: sun4i-ss - Fix 64-bit size_t warnings on sun4i-ss-hash.c
MAINTAINERS: Add maintainer for HiSilicon SEC V2 driver
crypto: hisilicon - add DebugFS for HiSilicon SEC
Documentation: add DebugFS doc for HiSilicon SEC
crypto: hisilicon - add SRIOV for HiSilicon SEC
...
This commit is contained in:
57
Documentation/ABI/testing/debugfs-hisi-hpre
Normal file
57
Documentation/ABI/testing/debugfs-hisi-hpre
Normal file
@@ -0,0 +1,57 @@
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the HPRE cluster.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Write the HPRE core selection in the cluster into this file,
|
||||
and then we can read the debug information of the core.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: HPRE cores debug registers read clear control. 1 means enable
|
||||
register read clear, otherwise 0. Writing to this file has no
|
||||
functional effect, only enable or disable counters clear after
|
||||
reading of these registers.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One HPRE controller has one PF and multiple VFs, each function
|
||||
has a QM. Select the QM which below qm refers to.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the HPRE.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/qm_regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the QM.
|
||||
Available for PF and VF in host. VF in guest currently only
|
||||
has one debug register.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One QM may contain multiple queues. Select specific queue to
|
||||
show its debug registers in above qm_regs.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: QM debug registers(qm_regs) read clear control. 1 means enable
|
||||
register read clear, otherwise 0.
|
||||
Writing to this file has no functional effect, only enable or
|
||||
disable counters clear after reading of these registers.
|
||||
Only available for PF.
|
||||
43
Documentation/ABI/testing/debugfs-hisi-sec
Normal file
43
Documentation/ABI/testing/debugfs-hisi-sec
Normal file
@@ -0,0 +1,43 @@
|
||||
What: /sys/kernel/debug/hisi_sec/<bdf>/sec_dfx
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the debug registers of SEC cores.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec/<bdf>/clear_enable
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Enabling/disabling of clear action after reading
|
||||
the SEC debug registers.
|
||||
0: disable, 1: enable.
|
||||
Only available for PF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec/<bdf>/current_qm
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One SEC controller has one PF and multiple VFs, each function
|
||||
has a QM. This file can be used to select the QM which below
|
||||
qm refers to.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec/<bdf>/qm/qm_regs
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of QM related debug registers.
|
||||
Available for PF and VF in host. VF in guest currently only
|
||||
has one debug register.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec/<bdf>/qm/current_q
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One QM of SEC may contain multiple queues. Select specific
|
||||
queue to show its debug registers in above 'qm_regs'.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec/<bdf>/qm/clear_enable
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Enabling/disabling of clear action after reading
|
||||
the SEC's QM debug registers.
|
||||
0: disable, 1: enable.
|
||||
Only available for PF, and take no other effect on SEC.
|
||||
@@ -5,7 +5,7 @@ Block Cipher Algorithm Definitions
|
||||
:doc: Block Cipher Algorithm Definitions
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:functions: crypto_alg ablkcipher_alg blkcipher_alg cipher_alg compress_alg
|
||||
:functions: crypto_alg cipher_alg compress_alg
|
||||
|
||||
Symmetric Key Cipher API
|
||||
------------------------
|
||||
@@ -33,30 +33,3 @@ Single Block Cipher API
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:functions: crypto_alloc_cipher crypto_free_cipher crypto_has_cipher crypto_cipher_blocksize crypto_cipher_setkey crypto_cipher_encrypt_one crypto_cipher_decrypt_one
|
||||
|
||||
Asynchronous Block Cipher API - Deprecated
|
||||
------------------------------------------
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:doc: Asynchronous Block Cipher API
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:functions: crypto_free_ablkcipher crypto_has_ablkcipher crypto_ablkcipher_ivsize crypto_ablkcipher_blocksize crypto_ablkcipher_setkey crypto_ablkcipher_reqtfm crypto_ablkcipher_encrypt crypto_ablkcipher_decrypt
|
||||
|
||||
Asynchronous Cipher Request Handle - Deprecated
|
||||
-----------------------------------------------
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:doc: Asynchronous Cipher Request Handle
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:functions: crypto_ablkcipher_reqsize ablkcipher_request_set_tfm ablkcipher_request_alloc ablkcipher_request_free ablkcipher_request_set_callback ablkcipher_request_set_crypt
|
||||
|
||||
Synchronous Block Cipher API - Deprecated
|
||||
-----------------------------------------
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:doc: Synchronous Block Cipher API
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:functions: crypto_alloc_blkcipher crypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv
|
||||
|
||||
@@ -201,10 +201,6 @@ the aforementioned cipher types:
|
||||
- CRYPTO_ALG_TYPE_AEAD Authenticated Encryption with Associated Data
|
||||
(MAC)
|
||||
|
||||
- CRYPTO_ALG_TYPE_BLKCIPHER Synchronous multi-block cipher
|
||||
|
||||
- CRYPTO_ALG_TYPE_ABLKCIPHER Asynchronous multi-block cipher
|
||||
|
||||
- CRYPTO_ALG_TYPE_KPP Key-agreement Protocol Primitive (KPP) such as
|
||||
an ECDH or DH implementation
|
||||
|
||||
|
||||
@@ -63,8 +63,6 @@ request by using:
|
||||
When your driver receives a crypto_request, you must to transfer it to
|
||||
the crypto engine via one of:
|
||||
|
||||
* crypto_transfer_ablkcipher_request_to_engine()
|
||||
|
||||
* crypto_transfer_aead_request_to_engine()
|
||||
|
||||
* crypto_transfer_akcipher_request_to_engine()
|
||||
@@ -75,8 +73,6 @@ the crypto engine via one of:
|
||||
|
||||
At the end of the request process, a call to one of the following functions is needed:
|
||||
|
||||
* crypto_finalize_ablkcipher_request()
|
||||
|
||||
* crypto_finalize_aead_request()
|
||||
|
||||
* crypto_finalize_akcipher_request()
|
||||
|
||||
@@ -128,25 +128,20 @@ process requests that are unaligned. This implies, however, additional
|
||||
overhead as the kernel crypto API needs to perform the realignment of
|
||||
the data which may imply moving of data.
|
||||
|
||||
Cipher Definition With struct blkcipher_alg and ablkcipher_alg
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Cipher Definition With struct skcipher_alg
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Struct blkcipher_alg defines a synchronous block cipher whereas struct
|
||||
ablkcipher_alg defines an asynchronous block cipher.
|
||||
Struct skcipher_alg defines a multi-block cipher, or more generally, a
|
||||
length-preserving symmetric cipher algorithm.
|
||||
|
||||
Please refer to the single block cipher description for schematics of
|
||||
the block cipher usage.
|
||||
Scatterlist handling
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Specifics Of Asynchronous Multi-Block Cipher
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are a couple of specifics to the asynchronous interface.
|
||||
|
||||
First of all, some of the drivers will want to use the Generic
|
||||
ScatterWalk in case the hardware needs to be fed separate chunks of the
|
||||
scatterlist which contains the plaintext and will contain the
|
||||
ciphertext. Please refer to the ScatterWalk interface offered by the
|
||||
Linux kernel scatter / gather list implementation.
|
||||
Some drivers will want to use the Generic ScatterWalk in case the
|
||||
hardware needs to be fed separate chunks of the scatterlist which
|
||||
contains the plaintext and will contain the ciphertext. Please refer
|
||||
to the ScatterWalk interface offered by the Linux kernel scatter /
|
||||
gather list implementation.
|
||||
|
||||
Hashing [HASH]
|
||||
--------------
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/crypto/allwinner,sun8i-ss.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Allwinner Security System v2 driver
|
||||
|
||||
maintainers:
|
||||
- Corentin Labbe <corentin.labbe@gmail.com>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- allwinner,sun8i-a83t-crypto
|
||||
- allwinner,sun9i-a80-crypto
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Bus clock
|
||||
- description: Module clock
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: bus
|
||||
- const: mod
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
- clocks
|
||||
- clock-names
|
||||
- resets
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/clock/sun8i-a83t-ccu.h>
|
||||
#include <dt-bindings/reset/sun8i-a83t-ccu.h>
|
||||
|
||||
crypto: crypto@1c15000 {
|
||||
compatible = "allwinner,sun8i-a83t-crypto";
|
||||
reg = <0x01c15000 0x1000>;
|
||||
interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
|
||||
resets = <&ccu RST_BUS_SS>;
|
||||
clocks = <&ccu CLK_BUS_SS>, <&ccu CLK_SS>;
|
||||
clock-names = "bus", "mod";
|
||||
};
|
||||
@@ -0,0 +1,52 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/crypto/amlogic,gxl-crypto.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Amlogic GXL Cryptographic Offloader
|
||||
|
||||
maintainers:
|
||||
- Corentin Labbe <clabbe@baylibre.com>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: amlogic,gxl-crypto
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
items:
|
||||
- description: "Interrupt for flow 0"
|
||||
- description: "Interrupt for flow 1"
|
||||
|
||||
clocks:
|
||||
maxItems: 1
|
||||
|
||||
clock-names:
|
||||
const: blkmv
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
- clocks
|
||||
- clock-names
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/irq.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/clock/gxbb-clkc.h>
|
||||
|
||||
crypto: crypto-engine@c883e000 {
|
||||
compatible = "amlogic,gxl-crypto";
|
||||
reg = <0x0 0xc883e000 0x0 0x36>;
|
||||
interrupts = <GIC_SPI 188 IRQ_TYPE_EDGE_RISING>, <GIC_SPI 189 IRQ_TYPE_EDGE_RISING>;
|
||||
clocks = <&clkc CLKID_BLKMV>;
|
||||
clock-names = "blkmv";
|
||||
};
|
||||
@@ -1,7 +1,7 @@
|
||||
Atmel TRNG (True Random Number Generator) block
|
||||
|
||||
Required properties:
|
||||
- compatible : Should be "atmel,at91sam9g45-trng"
|
||||
- compatible : Should be "atmel,at91sam9g45-trng" or "microchip,sam9x60-trng"
|
||||
- reg : Offset and length of the register set of this block
|
||||
- interrupts : the interrupt number for the TRNG block
|
||||
- clocks: should contain the TRNG clk source
|
||||
|
||||
12
Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.txt
Normal file
12
Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
NPCM SoC Random Number Generator
|
||||
|
||||
Required properties:
|
||||
- compatible : "nuvoton,npcm750-rng" for the NPCM7XX BMC.
|
||||
- reg : Specifies physical base address and size of the registers.
|
||||
|
||||
Example:
|
||||
|
||||
rng: rng@f000b000 {
|
||||
compatible = "nuvoton,npcm750-rng";
|
||||
reg = <0xf000b000 0x8>;
|
||||
};
|
||||
27
Documentation/devicetree/bindings/rng/omap3_rom_rng.txt
Normal file
27
Documentation/devicetree/bindings/rng/omap3_rom_rng.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
OMAP ROM RNG driver binding
|
||||
|
||||
Secure SoCs may provide RNG via secure ROM calls like Nokia N900 does. The
|
||||
implementation can depend on the SoC secure ROM used.
|
||||
|
||||
- compatible:
|
||||
Usage: required
|
||||
Value type: <string>
|
||||
Definition: must be "nokia,n900-rom-rng"
|
||||
|
||||
- clocks:
|
||||
Usage: required
|
||||
Value type: <prop-encoded-array>
|
||||
Definition: reference to the the RNG interface clock
|
||||
|
||||
- clock-names:
|
||||
Usage: required
|
||||
Value type: <stringlist>
|
||||
Definition: must be "ick"
|
||||
|
||||
Example:
|
||||
|
||||
rom_rng: rng {
|
||||
compatible = "nokia,n900-rom-rng";
|
||||
clocks = <&rng_ick>;
|
||||
clock-names = "ick";
|
||||
};
|
||||
@@ -0,0 +1,17 @@
|
||||
Exynos True Random Number Generator
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : Should be "samsung,exynos5250-trng".
|
||||
- reg : Specifies base physical address and size of the registers map.
|
||||
- clocks : Phandle to clock-controller plus clock-specifier pair.
|
||||
- clock-names : "secss" as a clock name.
|
||||
|
||||
Example:
|
||||
|
||||
rng@10830600 {
|
||||
compatible = "samsung,exynos5250-trng";
|
||||
reg = <0x10830600 0x100>;
|
||||
clocks = <&clock CLK_SSS>;
|
||||
clock-names = "secss";
|
||||
};
|
||||
37
MAINTAINERS
37
MAINTAINERS
@@ -682,11 +682,11 @@ S: Maintained
|
||||
F: Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt
|
||||
F: drivers/cpufreq/sun50i-cpufreq-nvmem.c
|
||||
|
||||
ALLWINNER SECURITY SYSTEM
|
||||
ALLWINNER CRYPTO DRIVERS
|
||||
M: Corentin Labbe <clabbe.montjoie@gmail.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/crypto/sunxi-ss/
|
||||
F: drivers/crypto/allwinner/
|
||||
|
||||
ALLWINNER VPU DRIVER
|
||||
M: Maxime Ripard <mripard@kernel.org>
|
||||
@@ -1470,6 +1470,14 @@ F: drivers/soc/amlogic/
|
||||
F: drivers/rtc/rtc-meson*
|
||||
N: meson
|
||||
|
||||
ARM/Amlogic Meson SoC Crypto Drivers
|
||||
M: Corentin Labbe <clabbe@baylibre.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
L: linux-amlogic@lists.infradead.org
|
||||
S: Maintained
|
||||
F: drivers/crypto/amlogic/
|
||||
F: Documentation/devicetree/bindings/crypto/amlogic*
|
||||
|
||||
ARM/Amlogic Meson SoC Sound Drivers
|
||||
M: Jerome Brunet <jbrunet@baylibre.com>
|
||||
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
|
||||
@@ -7372,6 +7380,25 @@ F: include/uapi/linux/if_hippi.h
|
||||
F: net/802/hippi.c
|
||||
F: drivers/net/hippi/
|
||||
|
||||
HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
|
||||
M: Zaibo Xu <xuzaibo@huawei.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/crypto/hisilicon/sec2/sec_crypto.c
|
||||
F: drivers/crypto/hisilicon/sec2/sec_main.c
|
||||
F: drivers/crypto/hisilicon/sec2/sec_crypto.h
|
||||
F: drivers/crypto/hisilicon/sec2/sec.h
|
||||
F: Documentation/ABI/testing/debugfs-hisi-sec
|
||||
|
||||
HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
|
||||
M: Zaibo Xu <xuzaibo@huawei.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/crypto/hisilicon/hpre/hpre_crypto.c
|
||||
F: drivers/crypto/hisilicon/hpre/hpre_main.c
|
||||
F: drivers/crypto/hisilicon/hpre/hpre.h
|
||||
F: Documentation/ABI/testing/debugfs-hisi-hpre
|
||||
|
||||
HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
|
||||
M: Yisen Zhuang <yisen.zhuang@huawei.com>
|
||||
M: Salil Mehta <salil.mehta@huawei.com>
|
||||
@@ -7380,6 +7407,11 @@ W: http://www.hisilicon.com
|
||||
S: Maintained
|
||||
F: drivers/net/ethernet/hisilicon/hns3/
|
||||
|
||||
HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT
|
||||
M: Zaibo Xu <xuzaibo@huawei.com>
|
||||
S: Maintained
|
||||
F: drivers/char/hw_random/hisi-trng-v2.c
|
||||
|
||||
HISILICON LPC BUS DRIVER
|
||||
M: john.garry@huawei.com
|
||||
W: http://www.hisilicon.com
|
||||
@@ -7425,7 +7457,6 @@ S: Maintained
|
||||
F: drivers/crypto/hisilicon/qm.c
|
||||
F: drivers/crypto/hisilicon/qm.h
|
||||
F: drivers/crypto/hisilicon/sgl.c
|
||||
F: drivers/crypto/hisilicon/sgl.h
|
||||
F: drivers/crypto/hisilicon/zip/
|
||||
F: Documentation/ABI/testing/debugfs-hisi-zip
|
||||
|
||||
|
||||
@@ -155,6 +155,12 @@
|
||||
pwms = <&pwm9 0 26316 0>; /* 38000 Hz */
|
||||
};
|
||||
|
||||
rom_rng: rng {
|
||||
compatible = "nokia,n900-rom-rng";
|
||||
clocks = <&rng_ick>;
|
||||
clock-names = "ick";
|
||||
};
|
||||
|
||||
/* controlled (enabled/disabled) directly by bcm2048 and wl1251 */
|
||||
vctcxo: vctcxo {
|
||||
compatible = "fixed-clock";
|
||||
|
||||
@@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON
|
||||
|
||||
config CRYPTO_SHA1_ARM_CE
|
||||
tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
|
||||
select CRYPTO_SHA1_ARM
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
@@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE
|
||||
|
||||
config CRYPTO_SHA2_ARM_CE
|
||||
tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
|
||||
select CRYPTO_SHA256_ARM
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
@@ -81,7 +81,7 @@ config CRYPTO_AES_ARM
|
||||
config CRYPTO_AES_ARM_BS
|
||||
tristate "Bit sliced AES using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_SIMD
|
||||
help
|
||||
@@ -96,8 +96,8 @@ config CRYPTO_AES_ARM_BS
|
||||
|
||||
config CRYPTO_AES_ARM_CE
|
||||
tristate "Accelerated AES using ARMv8 Crypto Extensions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_SIMD
|
||||
help
|
||||
@@ -106,7 +106,7 @@ config CRYPTO_AES_ARM_CE
|
||||
|
||||
config CRYPTO_GHASH_ARM_CE
|
||||
tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_CRYPTD
|
||||
select CRYPTO_GF128MUL
|
||||
@@ -118,23 +118,35 @@ config CRYPTO_GHASH_ARM_CE
|
||||
|
||||
config CRYPTO_CRCT10DIF_ARM_CE
|
||||
tristate "CRCT10DIF digest algorithm using PMULL instructions"
|
||||
depends on KERNEL_MODE_NEON && CRC_T10DIF
|
||||
depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
|
||||
depends on CRC_T10DIF
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CRC32_ARM_CE
|
||||
tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
|
||||
depends on KERNEL_MODE_NEON && CRC32
|
||||
depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
|
||||
depends on CRC32
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON accelerated ChaCha stream cipher algorithms"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||
|
||||
config CRYPTO_POLY1305_ARM
|
||||
tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_ARCH_HAVE_LIB_POLY1305
|
||||
|
||||
config CRYPTO_NHPOLY1305_NEON
|
||||
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_NHPOLY1305
|
||||
|
||||
config CRYPTO_CURVE25519_NEON
|
||||
tristate "NEON accelerated Curve25519 scalar multiplication library"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_LIB_CURVE25519_GENERIC
|
||||
select CRYPTO_ARCH_HAVE_LIB_CURVE25519
|
||||
|
||||
endif
|
||||
|
||||
@@ -10,34 +10,16 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
|
||||
obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
|
||||
obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
|
||||
|
||||
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
|
||||
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
|
||||
ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
|
||||
ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
|
||||
ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
|
||||
crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
|
||||
|
||||
ifneq ($(crc-obj-y)$(crc-obj-m),)
|
||||
ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y)
|
||||
ce-obj-y += $(crc-obj-y)
|
||||
ce-obj-m += $(crc-obj-m)
|
||||
else
|
||||
$(warning These CRC Extensions modules need binutils 2.23 or higher)
|
||||
$(warning $(crc-obj-y) $(crc-obj-m))
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(ce-obj-y)$(ce-obj-m),)
|
||||
ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
|
||||
obj-y += $(ce-obj-y)
|
||||
obj-m += $(ce-obj-m)
|
||||
else
|
||||
$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
|
||||
$(warning $(ce-obj-y) $(ce-obj-m))
|
||||
endif
|
||||
endif
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
|
||||
obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
|
||||
obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
|
||||
obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
|
||||
|
||||
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
|
||||
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
|
||||
@@ -53,13 +35,19 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
|
||||
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
|
||||
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
|
||||
chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
|
||||
chacha-neon-y := chacha-scalar-core.o chacha-glue.o
|
||||
chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
|
||||
poly1305-arm-y := poly1305-core.o poly1305-glue.o
|
||||
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
|
||||
curve25519-neon-y := curve25519-core.o curve25519-glue.o
|
||||
|
||||
ifdef REGENERATE_ARM_CRYPTO
|
||||
quiet_cmd_perl = PERL $@
|
||||
cmd_perl = $(PERL) $(<) > $(@)
|
||||
|
||||
$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
@@ -67,4 +55,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
|
||||
$(call cmd,perl)
|
||||
endif
|
||||
|
||||
clean-files += sha256-core.S sha512-core.S
|
||||
clean-files += poly1305-core.S sha256-core.S sha512-core.S
|
||||
|
||||
# massage the perlasm code a bit so we only get the NEON routine if we need it
|
||||
poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
|
||||
poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
|
||||
AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
|
||||
|
||||
343
arch/arm/crypto/chacha-glue.c
Normal file
343
arch/arm/crypto/chacha-glue.c
Normal file
@@ -0,0 +1,343 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* ARM NEON accelerated ChaCha and XChaCha stream ciphers,
|
||||
* including ChaCha20 (RFC7539)
|
||||
*
|
||||
* Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/chacha.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds);
|
||||
asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds);
|
||||
asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
|
||||
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
|
||||
asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
|
||||
const u32 *state, int nrounds);
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
|
||||
|
||||
static inline bool neon_usable(void)
|
||||
{
|
||||
return static_branch_likely(&use_neon) && crypto_simd_usable();
|
||||
}
|
||||
|
||||
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes, int nrounds)
|
||||
{
|
||||
u8 buf[CHACHA_BLOCK_SIZE];
|
||||
|
||||
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
chacha_4block_xor_neon(state, dst, src, nrounds);
|
||||
bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
src += CHACHA_BLOCK_SIZE * 4;
|
||||
dst += CHACHA_BLOCK_SIZE * 4;
|
||||
state[12] += 4;
|
||||
}
|
||||
while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
chacha_block_xor_neon(state, dst, src, nrounds);
|
||||
bytes -= CHACHA_BLOCK_SIZE;
|
||||
src += CHACHA_BLOCK_SIZE;
|
||||
dst += CHACHA_BLOCK_SIZE;
|
||||
state[12]++;
|
||||
}
|
||||
if (bytes) {
|
||||
memcpy(buf, src, bytes);
|
||||
chacha_block_xor_neon(state, buf, buf, nrounds);
|
||||
memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
|
||||
hchacha_block_arm(state, stream, nrounds);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
hchacha_block_neon(state, stream, nrounds);
|
||||
kernel_neon_end();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(hchacha_block_arch);
|
||||
|
||||
void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
|
||||
{
|
||||
chacha_init_generic(state, key, iv);
|
||||
}
|
||||
EXPORT_SYMBOL(chacha_init_arch);
|
||||
|
||||
void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
|
||||
int nrounds)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
|
||||
bytes <= CHACHA_BLOCK_SIZE) {
|
||||
chacha_doarm(dst, src, bytes, state, nrounds);
|
||||
state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
chacha_doneon(state, dst, src, bytes, nrounds);
|
||||
kernel_neon_end();
|
||||
}
|
||||
EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
|
||||
static int chacha_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv,
|
||||
bool neon)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
chacha_init_generic(state, ctx->key, iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
if (!neon) {
|
||||
chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes, state, ctx->nrounds);
|
||||
state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
chacha_doneon(state, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, nbytes, ctx->nrounds);
|
||||
kernel_neon_end();
|
||||
}
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int do_chacha(struct skcipher_request *req, bool neon)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return chacha_stream_xor(req, ctx, req->iv, neon);
|
||||
}
|
||||
|
||||
static int chacha_arm(struct skcipher_request *req)
|
||||
{
|
||||
return do_chacha(req, false);
|
||||
}
|
||||
|
||||
static int chacha_neon(struct skcipher_request *req)
|
||||
{
|
||||
return do_chacha(req, neon_usable());
|
||||
}
|
||||
|
||||
static int do_xchacha(struct skcipher_request *req, bool neon)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct chacha_ctx subctx;
|
||||
u32 state[16];
|
||||
u8 real_iv[16];
|
||||
|
||||
chacha_init_generic(state, ctx->key, req->iv);
|
||||
|
||||
if (!neon) {
|
||||
hchacha_block_arm(state, subctx.key, ctx->nrounds);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
hchacha_block_neon(state, subctx.key, ctx->nrounds);
|
||||
kernel_neon_end();
|
||||
}
|
||||
subctx.nrounds = ctx->nrounds;
|
||||
|
||||
memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
memcpy(&real_iv[8], req->iv + 16, 8);
|
||||
return chacha_stream_xor(req, &subctx, real_iv, neon);
|
||||
}
|
||||
|
||||
static int xchacha_arm(struct skcipher_request *req)
|
||||
{
|
||||
return do_xchacha(req, false);
|
||||
}
|
||||
|
||||
static int xchacha_neon(struct skcipher_request *req)
|
||||
{
|
||||
return do_xchacha(req, neon_usable());
|
||||
}
|
||||
|
||||
static struct skcipher_alg arm_algs[] = {
|
||||
{
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-arm",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = CHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.setkey = chacha20_setkey,
|
||||
.encrypt = chacha_arm,
|
||||
.decrypt = chacha_arm,
|
||||
}, {
|
||||
.base.cra_name = "xchacha20",
|
||||
.base.cra_driver_name = "xchacha20-arm",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.setkey = chacha20_setkey,
|
||||
.encrypt = xchacha_arm,
|
||||
.decrypt = xchacha_arm,
|
||||
}, {
|
||||
.base.cra_name = "xchacha12",
|
||||
.base.cra_driver_name = "xchacha12-arm",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.setkey = chacha12_setkey,
|
||||
.encrypt = xchacha_arm,
|
||||
.decrypt = xchacha_arm,
|
||||
},
|
||||
};
|
||||
|
||||
static struct skcipher_alg neon_algs[] = {
|
||||
{
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = CHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = chacha20_setkey,
|
||||
.encrypt = chacha_neon,
|
||||
.decrypt = chacha_neon,
|
||||
}, {
|
||||
.base.cra_name = "xchacha20",
|
||||
.base.cra_driver_name = "xchacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = chacha20_setkey,
|
||||
.encrypt = xchacha_neon,
|
||||
.decrypt = xchacha_neon,
|
||||
}, {
|
||||
.base.cra_name = "xchacha12",
|
||||
.base.cra_driver_name = "xchacha12-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = chacha12_setkey,
|
||||
.encrypt = xchacha_neon,
|
||||
.decrypt = xchacha_neon,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init chacha_simd_mod_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
|
||||
int i;
|
||||
|
||||
switch (read_cpuid_part()) {
|
||||
case ARM_CPU_PART_CORTEX_A7:
|
||||
case ARM_CPU_PART_CORTEX_A5:
|
||||
/*
|
||||
* The Cortex-A7 and Cortex-A5 do not perform well with
|
||||
* the NEON implementation but do incredibly with the
|
||||
* scalar one and use less power.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
|
||||
neon_algs[i].base.cra_priority = 0;
|
||||
break;
|
||||
default:
|
||||
static_branch_enable(&use_neon);
|
||||
}
|
||||
|
||||
err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
if (err)
|
||||
crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
|
||||
crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
}
|
||||
|
||||
module_init(chacha_simd_mod_init);
|
||||
module_exit(chacha_simd_mod_fini);
|
||||
|
||||
MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("chacha20");
|
||||
MODULE_ALIAS_CRYPTO("chacha20-arm");
|
||||
MODULE_ALIAS_CRYPTO("xchacha20");
|
||||
MODULE_ALIAS_CRYPTO("xchacha20-arm");
|
||||
MODULE_ALIAS_CRYPTO("xchacha12");
|
||||
MODULE_ALIAS_CRYPTO("xchacha12-arm");
|
||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
MODULE_ALIAS_CRYPTO("chacha20-neon");
|
||||
MODULE_ALIAS_CRYPTO("xchacha20-neon");
|
||||
MODULE_ALIAS_CRYPTO("xchacha12-neon");
|
||||
#endif
|
||||
@@ -1,202 +0,0 @@
|
||||
/*
|
||||
* ARM NEON accelerated ChaCha and XChaCha stream ciphers,
|
||||
* including ChaCha20 (RFC7539)
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds);
|
||||
asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds);
|
||||
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
|
||||
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes, int nrounds)
|
||||
{
|
||||
u8 buf[CHACHA_BLOCK_SIZE];
|
||||
|
||||
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
chacha_4block_xor_neon(state, dst, src, nrounds);
|
||||
bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
src += CHACHA_BLOCK_SIZE * 4;
|
||||
dst += CHACHA_BLOCK_SIZE * 4;
|
||||
state[12] += 4;
|
||||
}
|
||||
while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
chacha_block_xor_neon(state, dst, src, nrounds);
|
||||
bytes -= CHACHA_BLOCK_SIZE;
|
||||
src += CHACHA_BLOCK_SIZE;
|
||||
dst += CHACHA_BLOCK_SIZE;
|
||||
state[12]++;
|
||||
}
|
||||
if (bytes) {
|
||||
memcpy(buf, src, bytes);
|
||||
chacha_block_xor_neon(state, buf, buf, nrounds);
|
||||
memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha_neon_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
crypto_chacha_init(state, ctx, iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
kernel_neon_begin();
|
||||
chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes, ctx->nrounds);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int chacha_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
|
||||
return crypto_chacha_crypt(req);
|
||||
|
||||
return chacha_neon_stream_xor(req, ctx, req->iv);
|
||||
}
|
||||
|
||||
static int xchacha_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct chacha_ctx subctx;
|
||||
u32 state[16];
|
||||
u8 real_iv[16];
|
||||
|
||||
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
|
||||
return crypto_xchacha_crypt(req);
|
||||
|
||||
crypto_chacha_init(state, ctx, req->iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
hchacha_block_neon(state, subctx.key, ctx->nrounds);
|
||||
kernel_neon_end();
|
||||
subctx.nrounds = ctx->nrounds;
|
||||
|
||||
memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
memcpy(&real_iv[8], req->iv + 16, 8);
|
||||
return chacha_neon_stream_xor(req, &subctx, real_iv);
|
||||
}
|
||||
|
||||
static struct skcipher_alg algs[] = {
|
||||
{
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = CHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha_neon,
|
||||
.decrypt = chacha_neon,
|
||||
}, {
|
||||
.base.cra_name = "xchacha20",
|
||||
.base.cra_driver_name = "xchacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = xchacha_neon,
|
||||
.decrypt = xchacha_neon,
|
||||
}, {
|
||||
.base.cra_name = "xchacha12",
|
||||
.base.cra_driver_name = "xchacha12-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha12_setkey,
|
||||
.encrypt = xchacha_neon,
|
||||
.decrypt = xchacha_neon,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init chacha_simd_mod_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_NEON))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(chacha_simd_mod_init);
|
||||
module_exit(chacha_simd_mod_fini);
|
||||
|
||||
MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("chacha20");
|
||||
MODULE_ALIAS_CRYPTO("chacha20-neon");
|
||||
MODULE_ALIAS_CRYPTO("xchacha20");
|
||||
MODULE_ALIAS_CRYPTO("xchacha20-neon");
|
||||
MODULE_ALIAS_CRYPTO("xchacha12");
|
||||
MODULE_ALIAS_CRYPTO("xchacha12-neon");
|
||||
460
arch/arm/crypto/chacha-scalar-core.S
Normal file
460
arch/arm/crypto/chacha-scalar-core.S
Normal file
@@ -0,0 +1,460 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2018 Google, Inc.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* Design notes:
|
||||
*
|
||||
* 16 registers would be needed to hold the state matrix, but only 14 are
|
||||
* available because 'sp' and 'pc' cannot be used. So we spill the elements
|
||||
* (x8, x9) to the stack and swap them out with (x10, x11). This adds one
|
||||
* 'ldrd' and one 'strd' instruction per round.
|
||||
*
|
||||
* All rotates are performed using the implicit rotate operand accepted by the
|
||||
* 'add' and 'eor' instructions. This is faster than using explicit rotate
|
||||
* instructions. To make this work, we allow the values in the second and last
|
||||
* rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
|
||||
* wrong rotation amount. The rotation amount is then fixed up just in time
|
||||
* when the values are used. 'brot' is the number of bits the values in row 'b'
|
||||
* need to be rotated right to arrive at the correct values, and 'drot'
|
||||
* similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
|
||||
* that they end up as (25, 24) after every round.
|
||||
*/
|
||||
|
||||
// ChaCha state registers
|
||||
X0 .req r0
|
||||
X1 .req r1
|
||||
X2 .req r2
|
||||
X3 .req r3
|
||||
X4 .req r4
|
||||
X5 .req r5
|
||||
X6 .req r6
|
||||
X7 .req r7
|
||||
X8_X10 .req r8 // shared by x8 and x10
|
||||
X9_X11 .req r9 // shared by x9 and x11
|
||||
X12 .req r10
|
||||
X13 .req r11
|
||||
X14 .req r12
|
||||
X15 .req r14
|
||||
|
||||
.macro __rev out, in, t0, t1, t2
|
||||
.if __LINUX_ARM_ARCH__ >= 6
|
||||
rev \out, \in
|
||||
.else
|
||||
lsl \t0, \in, #24
|
||||
and \t1, \in, #0xff00
|
||||
and \t2, \in, #0xff0000
|
||||
orr \out, \t0, \in, lsr #24
|
||||
orr \out, \out, \t1, lsl #8
|
||||
orr \out, \out, \t2, lsr #8
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro _le32_bswap x, t0, t1, t2
|
||||
#ifdef __ARMEB__
|
||||
__rev \x, \x, \t0, \t1, \t2
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
|
||||
_le32_bswap \a, \t0, \t1, \t2
|
||||
_le32_bswap \b, \t0, \t1, \t2
|
||||
_le32_bswap \c, \t0, \t1, \t2
|
||||
_le32_bswap \d, \t0, \t1, \t2
|
||||
.endm
|
||||
|
||||
.macro __ldrd a, b, src, offset
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
ldrd \a, \b, [\src, #\offset]
|
||||
#else
|
||||
ldr \a, [\src, #\offset]
|
||||
ldr \b, [\src, #\offset + 4]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro __strd a, b, dst, offset
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
strd \a, \b, [\dst, #\offset]
|
||||
#else
|
||||
str \a, [\dst, #\offset]
|
||||
str \b, [\dst, #\offset + 4]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
|
||||
|
||||
// a += b; d ^= a; d = rol(d, 16);
|
||||
add \a1, \a1, \b1, ror #brot
|
||||
add \a2, \a2, \b2, ror #brot
|
||||
eor \d1, \a1, \d1, ror #drot
|
||||
eor \d2, \a2, \d2, ror #drot
|
||||
// drot == 32 - 16 == 16
|
||||
|
||||
// c += d; b ^= c; b = rol(b, 12);
|
||||
add \c1, \c1, \d1, ror #16
|
||||
add \c2, \c2, \d2, ror #16
|
||||
eor \b1, \c1, \b1, ror #brot
|
||||
eor \b2, \c2, \b2, ror #brot
|
||||
// brot == 32 - 12 == 20
|
||||
|
||||
// a += b; d ^= a; d = rol(d, 8);
|
||||
add \a1, \a1, \b1, ror #20
|
||||
add \a2, \a2, \b2, ror #20
|
||||
eor \d1, \a1, \d1, ror #16
|
||||
eor \d2, \a2, \d2, ror #16
|
||||
// drot == 32 - 8 == 24
|
||||
|
||||
// c += d; b ^= c; b = rol(b, 7);
|
||||
add \c1, \c1, \d1, ror #24
|
||||
add \c2, \c2, \d2, ror #24
|
||||
eor \b1, \c1, \b1, ror #20
|
||||
eor \b2, \c2, \b2, ror #20
|
||||
// brot == 32 - 7 == 25
|
||||
.endm
|
||||
|
||||
.macro _doubleround
|
||||
|
||||
// column round
|
||||
|
||||
// quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
|
||||
_halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
|
||||
|
||||
// save (x8, x9); restore (x10, x11)
|
||||
__strd X8_X10, X9_X11, sp, 0
|
||||
__ldrd X8_X10, X9_X11, sp, 8
|
||||
|
||||
// quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
|
||||
_halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
|
||||
|
||||
.set brot, 25
|
||||
.set drot, 24
|
||||
|
||||
// diagonal round
|
||||
|
||||
// quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
|
||||
_halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
|
||||
|
||||
// save (x10, x11); restore (x8, x9)
|
||||
__strd X8_X10, X9_X11, sp, 8
|
||||
__ldrd X8_X10, X9_X11, sp, 0
|
||||
|
||||
// quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
|
||||
_halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
|
||||
.endm
|
||||
|
||||
.macro _chacha_permute nrounds
|
||||
.set brot, 0
|
||||
.set drot, 0
|
||||
.rept \nrounds / 2
|
||||
_doubleround
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro _chacha nrounds
|
||||
|
||||
.Lnext_block\@:
|
||||
// Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
|
||||
// Registers contain x0-x9,x12-x15.
|
||||
|
||||
// Do the core ChaCha permutation to update x0-x15.
|
||||
_chacha_permute \nrounds
|
||||
|
||||
add sp, #8
|
||||
// Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers contain x0-x9,x12-x15.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
|
||||
push {X8_X10, X9_X11, X12, X13, X14, X15}
|
||||
|
||||
// Load (OUT, IN, LEN).
|
||||
ldr r14, [sp, #96]
|
||||
ldr r12, [sp, #100]
|
||||
ldr r11, [sp, #104]
|
||||
|
||||
orr r10, r14, r12
|
||||
|
||||
// Use slow path if fewer than 64 bytes remain.
|
||||
cmp r11, #64
|
||||
blt .Lxor_slowpath\@
|
||||
|
||||
// Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
|
||||
// ARMv6+, since ldmia and stmia (used below) still require alignment.
|
||||
tst r10, #3
|
||||
bne .Lxor_slowpath\@
|
||||
|
||||
// Fast path: XOR 64 bytes of aligned data.
|
||||
|
||||
// Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// x0-x3
|
||||
__ldrd r8, r9, sp, 32
|
||||
__ldrd r10, r11, sp, 40
|
||||
add X0, X0, r8
|
||||
add X1, X1, r9
|
||||
add X2, X2, r10
|
||||
add X3, X3, r11
|
||||
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
|
||||
ldmia r12!, {r8-r11}
|
||||
eor X0, X0, r8
|
||||
eor X1, X1, r9
|
||||
eor X2, X2, r10
|
||||
eor X3, X3, r11
|
||||
stmia r14!, {X0-X3}
|
||||
|
||||
// x4-x7
|
||||
__ldrd r8, r9, sp, 48
|
||||
__ldrd r10, r11, sp, 56
|
||||
add X4, r8, X4, ror #brot
|
||||
add X5, r9, X5, ror #brot
|
||||
ldmia r12!, {X0-X3}
|
||||
add X6, r10, X6, ror #brot
|
||||
add X7, r11, X7, ror #brot
|
||||
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
|
||||
eor X4, X4, X0
|
||||
eor X5, X5, X1
|
||||
eor X6, X6, X2
|
||||
eor X7, X7, X3
|
||||
stmia r14!, {X4-X7}
|
||||
|
||||
// x8-x15
|
||||
pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
|
||||
__ldrd r8, r9, sp, 32
|
||||
__ldrd r10, r11, sp, 40
|
||||
add r0, r0, r8 // x8
|
||||
add r1, r1, r9 // x9
|
||||
add r6, r6, r10 // x10
|
||||
add r7, r7, r11 // x11
|
||||
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
|
||||
ldmia r12!, {r8-r11}
|
||||
eor r0, r0, r8 // x8
|
||||
eor r1, r1, r9 // x9
|
||||
eor r6, r6, r10 // x10
|
||||
eor r7, r7, r11 // x11
|
||||
stmia r14!, {r0,r1,r6,r7}
|
||||
ldmia r12!, {r0,r1,r6,r7}
|
||||
__ldrd r8, r9, sp, 48
|
||||
__ldrd r10, r11, sp, 56
|
||||
add r2, r8, r2, ror #drot // x12
|
||||
add r3, r9, r3, ror #drot // x13
|
||||
add r4, r10, r4, ror #drot // x14
|
||||
add r5, r11, r5, ror #drot // x15
|
||||
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
|
||||
ldr r9, [sp, #72] // load LEN
|
||||
eor r2, r2, r0 // x12
|
||||
eor r3, r3, r1 // x13
|
||||
eor r4, r4, r6 // x14
|
||||
eor r5, r5, r7 // x15
|
||||
subs r9, #64 // decrement and check LEN
|
||||
stmia r14!, {r2-r5}
|
||||
|
||||
beq .Ldone\@
|
||||
|
||||
.Lprepare_for_next_block\@:
|
||||
|
||||
// Stack: x0-x15 OUT IN LEN
|
||||
|
||||
// Increment block counter (x12)
|
||||
add r8, #1
|
||||
|
||||
// Store updated (OUT, IN, LEN)
|
||||
str r14, [sp, #64]
|
||||
str r12, [sp, #68]
|
||||
str r9, [sp, #72]
|
||||
|
||||
mov r14, sp
|
||||
|
||||
// Store updated block counter (x12)
|
||||
str r8, [sp, #48]
|
||||
|
||||
sub sp, #16
|
||||
|
||||
// Reload state and do next block
|
||||
ldmia r14!, {r0-r11} // load x0-x11
|
||||
__strd r10, r11, sp, 8 // store x10-x11 before state
|
||||
ldmia r14, {r10-r12,r14} // load x12-x15
|
||||
b .Lnext_block\@
|
||||
|
||||
.Lxor_slowpath\@:
|
||||
// Slow path: < 64 bytes remaining, or unaligned input or output buffer.
|
||||
// We handle it by storing the 64 bytes of keystream to the stack, then
|
||||
// XOR-ing the needed portion with the data.
|
||||
|
||||
// Allocate keystream buffer
|
||||
sub sp, #64
|
||||
mov r14, sp
|
||||
|
||||
// Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// Save keystream for x0-x3
|
||||
__ldrd r8, r9, sp, 96
|
||||
__ldrd r10, r11, sp, 104
|
||||
add X0, X0, r8
|
||||
add X1, X1, r9
|
||||
add X2, X2, r10
|
||||
add X3, X3, r11
|
||||
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
|
||||
stmia r14!, {X0-X3}
|
||||
|
||||
// Save keystream for x4-x7
|
||||
__ldrd r8, r9, sp, 112
|
||||
__ldrd r10, r11, sp, 120
|
||||
add X4, r8, X4, ror #brot
|
||||
add X5, r9, X5, ror #brot
|
||||
add X6, r10, X6, ror #brot
|
||||
add X7, r11, X7, ror #brot
|
||||
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
|
||||
add r8, sp, #64
|
||||
stmia r14!, {X4-X7}
|
||||
|
||||
// Save keystream for x8-x15
|
||||
ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
|
||||
__ldrd r8, r9, sp, 128
|
||||
__ldrd r10, r11, sp, 136
|
||||
add r0, r0, r8 // x8
|
||||
add r1, r1, r9 // x9
|
||||
add r6, r6, r10 // x10
|
||||
add r7, r7, r11 // x11
|
||||
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
|
||||
stmia r14!, {r0,r1,r6,r7}
|
||||
__ldrd r8, r9, sp, 144
|
||||
__ldrd r10, r11, sp, 152
|
||||
add r2, r8, r2, ror #drot // x12
|
||||
add r3, r9, r3, ror #drot // x13
|
||||
add r4, r10, r4, ror #drot // x14
|
||||
add r5, r11, r5, ror #drot // x15
|
||||
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
|
||||
stmia r14, {r2-r5}
|
||||
|
||||
// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
|
||||
// Registers: r8 is block counter, r12 is IN.
|
||||
|
||||
ldr r9, [sp, #168] // LEN
|
||||
ldr r14, [sp, #160] // OUT
|
||||
cmp r9, #64
|
||||
mov r0, sp
|
||||
movle r1, r9
|
||||
movgt r1, #64
|
||||
// r1 is number of bytes to XOR, in range [1, 64]
|
||||
|
||||
.if __LINUX_ARM_ARCH__ < 6
|
||||
orr r2, r12, r14
|
||||
tst r2, #3 // IN or OUT misaligned?
|
||||
bne .Lxor_next_byte\@
|
||||
.endif
|
||||
|
||||
// XOR a word at a time
|
||||
.rept 16
|
||||
subs r1, #4
|
||||
blt .Lxor_words_done\@
|
||||
ldr r2, [r12], #4
|
||||
ldr r3, [r0], #4
|
||||
eor r2, r2, r3
|
||||
str r2, [r14], #4
|
||||
.endr
|
||||
b .Lxor_slowpath_done\@
|
||||
.Lxor_words_done\@:
|
||||
ands r1, r1, #3
|
||||
beq .Lxor_slowpath_done\@
|
||||
|
||||
// XOR a byte at a time
|
||||
.Lxor_next_byte\@:
|
||||
ldrb r2, [r12], #1
|
||||
ldrb r3, [r0], #1
|
||||
eor r2, r2, r3
|
||||
strb r2, [r14], #1
|
||||
subs r1, #1
|
||||
bne .Lxor_next_byte\@
|
||||
|
||||
.Lxor_slowpath_done\@:
|
||||
subs r9, #64
|
||||
add sp, #96
|
||||
bgt .Lprepare_for_next_block\@
|
||||
|
||||
.Ldone\@:
|
||||
.endm // _chacha
|
||||
|
||||
/*
|
||||
* void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
|
||||
* const u32 *state, int nrounds);
|
||||
*/
|
||||
ENTRY(chacha_doarm)
|
||||
cmp r2, #0 // len == 0?
|
||||
reteq lr
|
||||
|
||||
ldr ip, [sp]
|
||||
cmp ip, #12
|
||||
|
||||
push {r0-r2,r4-r11,lr}
|
||||
|
||||
// Push state x0-x15 onto stack.
|
||||
// Also store an extra copy of x10-x11 just before the state.
|
||||
|
||||
add X12, r3, #48
|
||||
ldm X12, {X12,X13,X14,X15}
|
||||
push {X12,X13,X14,X15}
|
||||
sub sp, sp, #64
|
||||
|
||||
__ldrd X8_X10, X9_X11, r3, 40
|
||||
__strd X8_X10, X9_X11, sp, 8
|
||||
__strd X8_X10, X9_X11, sp, 56
|
||||
ldm r3, {X0-X9_X11}
|
||||
__strd X0, X1, sp, 16
|
||||
__strd X2, X3, sp, 24
|
||||
__strd X4, X5, sp, 32
|
||||
__strd X6, X7, sp, 40
|
||||
__strd X8_X10, X9_X11, sp, 48
|
||||
|
||||
beq 1f
|
||||
_chacha 20
|
||||
|
||||
0: add sp, #76
|
||||
pop {r4-r11, pc}
|
||||
|
||||
1: _chacha 12
|
||||
b 0b
|
||||
ENDPROC(chacha_doarm)
|
||||
|
||||
/*
|
||||
* void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
|
||||
*/
|
||||
ENTRY(hchacha_block_arm)
|
||||
push {r1,r4-r11,lr}
|
||||
|
||||
cmp r2, #12 // ChaCha12 ?
|
||||
|
||||
mov r14, r0
|
||||
ldmia r14!, {r0-r11} // load x0-x11
|
||||
push {r10-r11} // store x10-x11 to stack
|
||||
ldm r14, {r10-r12,r14} // load x12-x15
|
||||
sub sp, #8
|
||||
|
||||
beq 1f
|
||||
_chacha_permute 20
|
||||
|
||||
// Skip over (unused0-unused1, x10-x11)
|
||||
0: add sp, #16
|
||||
|
||||
// Fix up rotations of x12-x15
|
||||
ror X12, X12, #drot
|
||||
ror X13, X13, #drot
|
||||
pop {r4} // load 'out'
|
||||
ror X14, X14, #drot
|
||||
ror X15, X15, #drot
|
||||
|
||||
// Store (x0-x3,x12-x15) to 'out'
|
||||
stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
|
||||
|
||||
pop {r4-r11,pc}
|
||||
|
||||
1: _chacha_permute 12
|
||||
b 0b
|
||||
ENDPROC(hchacha_block_arm)
|
||||
@@ -72,7 +72,7 @@
|
||||
#endif
|
||||
|
||||
.text
|
||||
.arch armv7-a
|
||||
.arch armv8-a
|
||||
.fpu crypto-neon-fp-armv8
|
||||
|
||||
init_crc .req r0
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user