Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:
   - Fix dcache flushing crash in skcipher.
   - Add hash finup self-tests.
   - Reschedule during speed tests.

  Algorithms:
   - Remove insecure vmac and replace it with vmac64.
   - Add public key verification for DH/ECDH.

  Drivers:
   - Decrease priority of sha-mb on x86.
   - Improve NEON latency/throughput on ARM64.
   - Add md5/sha384/sha512/des/3des to inside-secure.
   - Support eip197d in inside-secure.
   - Only register algorithms supported by the host in virtio.
   - Add cts and remove incompatible cts1 from ccree.
   - Add hisilicon SEC security accelerator driver.
   - Replace msm hwrng driver with qcom pseudo rng driver.

  Misc:
   - Centralize CRC polynomials"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (121 commits)
  crypto: arm64/ghash-ce - implement 4-way aggregation
  crypto: arm64/ghash-ce - replace NEON yield check with block limit
  crypto: hisilicon - sec_send_request() can be static
  lib/mpi: remove redundant variable esign
  crypto: arm64/aes-ce-gcm - don't reload key schedule if avoidable
  crypto: arm64/aes-ce-gcm - implement 2-way aggregation
  crypto: arm64/aes-ce-gcm - operate on two input blocks at a time
  crypto: dh - make crypto_dh_encode_key() make robust
  crypto: dh - fix calculating encoded key size
  crypto: ccp - Check for NULL PSP pointer at module unload
  crypto: arm/chacha20 - always use vrev for 16-bit rotates
  crypto: ccree - allow bigger than sector XTS op
  crypto: ccree - zero all of request ctx before use
  crypto: ccree - remove cipher ivgen left overs
  crypto: ccree - drop useless type flag during reg
  crypto: ablkcipher - fix crash flushing dcache in error path
  crypto: blkcipher - fix crash flushing dcache in error path
  crypto: skcipher - fix crash flushing dcache in error path
  crypto: skcipher - remove unnecessary setting of walk->nbytes
  crypto: scatterwalk - remove scatterwalk_samebuf()
  ...
This commit is contained in:
Linus Torvalds
2018-08-15 16:01:47 -07:00
174 changed files with 6690 additions and 2013 deletions
+1 -1
View File
@@ -162,7 +162,7 @@ Code Example For Use of Operational State Memory With SHASH
char *hash_alg_name = "sha1-padlock-nano";
int ret;
alg = crypto_alloc_shash(hash_alg_name, CRYPTO_ALG_TYPE_SHASH, 0);
alg = crypto_alloc_shash(hash_alg_name, 0, 0);
if (IS_ERR(alg)) {
pr_info("can't alloc alg %s\n", hash_alg_name);
return PTR_ERR(alg);
@@ -0,0 +1,67 @@
* Hisilicon hip07 Security Accelerator (SEC)
Required properties:
- compatible: Must contain one of
- "hisilicon,hip06-sec"
- "hisilicon,hip07-sec"
- reg: Memory addresses and lengths of the memory regions through which
this device is controlled.
Region 0 has registers to control the backend processing engines.
Region 1 has registers for functionality common to all queues.
Regions 2-18 have registers for the 16 individual queues which are isolated
both in hardware and within the driver.
- interrupts: Interrupt specifiers.
Refer to interrupt-controller/interrupts.txt for generic interrupt client node
bindings.
Interrupt 0 is for the SEC unit error queue.
Interrupt 2N + 1 is the completion interrupt for queue N.
Interrupt 2N + 2 is the error interrupt for queue N.
- dma-coherent: The driver assumes coherent dma is possible.
Optional properties:
- iommus: The SEC units are behind smmu-v3 iommus.
Refer to iommu/arm,smmu-v3.txt for more information.
Example:
p1_sec_a: crypto@400,d2000000 {
compatible = "hisilicon,hip07-sec";
reg = <0x400 0xd0000000 0x0 0x10000
0x400 0xd2000000 0x0 0x10000
0x400 0xd2010000 0x0 0x10000
0x400 0xd2020000 0x0 0x10000
0x400 0xd2030000 0x0 0x10000
0x400 0xd2040000 0x0 0x10000
0x400 0xd2050000 0x0 0x10000
0x400 0xd2060000 0x0 0x10000
0x400 0xd2070000 0x0 0x10000
0x400 0xd2080000 0x0 0x10000
0x400 0xd2090000 0x0 0x10000
0x400 0xd20a0000 0x0 0x10000
0x400 0xd20b0000 0x0 0x10000
0x400 0xd20c0000 0x0 0x10000
0x400 0xd20d0000 0x0 0x10000
0x400 0xd20e0000 0x0 0x10000
0x400 0xd20f0000 0x0 0x10000
0x400 0xd2100000 0x0 0x10000>;
interrupt-parent = <&p1_mbigen_sec_a>;
iommus = <&p1_smmu_alg_a 0x600>;
dma-coherent;
interrupts = <576 4>,
<577 1>, <578 4>,
<579 1>, <580 4>,
<581 1>, <582 4>,
<583 1>, <584 4>,
<585 1>, <586 4>,
<587 1>, <588 4>,
<589 1>, <590 4>,
<591 1>, <592 4>,
<593 1>, <594 4>,
<595 1>, <596 4>,
<597 1>, <598 4>,
<599 1>, <600 4>,
<601 1>, <602 4>,
<603 1>, <604 4>,
<605 1>, <606 4>,
<607 1>, <608 4>;
};
@@ -1,8 +1,9 @@
Inside Secure SafeXcel cryptographic engine
Required properties:
- compatible: Should be "inside-secure,safexcel-eip197" or
"inside-secure,safexcel-eip97".
- compatible: Should be "inside-secure,safexcel-eip197b",
"inside-secure,safexcel-eip197d" or
"inside-secure,safexcel-eip97ies".
- reg: Base physical address of the engine and length of memory mapped region.
- interrupts: Interrupt numbers for the rings and engine.
- interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
@@ -14,10 +15,18 @@ Optional properties:
name must be "core" for the first clock and "reg" for
the second one.
Backward compatibility:
Two compatibles are kept for backward compatibility, but shouldn't be used for
new submissions:
- "inside-secure,safexcel-eip197" is equivalent to
"inside-secure,safexcel-eip197b".
- "inside-secure,safexcel-eip97" is equivalent to
"inside-secure,safexcel-eip97ies".
Example:
crypto: crypto@800000 {
compatible = "inside-secure,safexcel-eip197";
compatible = "inside-secure,safexcel-eip197b";
reg = <0x800000 0x200000>;
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
@@ -2,7 +2,9 @@ Qualcomm MSM pseudo random number generator.
Required properties:
- compatible : should be "qcom,prng"
- compatible : should be "qcom,prng" for 8916 etc
: should be "qcom,prng-ee" for 8996 and later using EE
(Execution Environment) slice of prng
- reg : specifies base physical address and size of the registers map
- clocks : phandle to clock-controller plus clock-specifier pair
- clock-names : "core" clocks all registers, FIFO and circuits in PRNG IP block
+1 -1
View File
@@ -7364,7 +7364,7 @@ M: Megha Dey <megha.dey@linux.intel.com>
R: Tim Chen <tim.c.chen@linux.intel.com>
L: linux-crypto@vger.kernel.org
S: Supported
F: arch/x86/crypto/sha*-mb
F: arch/x86/crypto/sha*-mb/
F: crypto/mcryptd.c
INTEL TELEMETRY DRIVER
+4 -6
View File
@@ -51,9 +51,8 @@ ENTRY(chacha20_block_xor_neon)
.Ldoubleround:
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
vadd.i32 q0, q0, q1
veor q4, q3, q0
vshl.u32 q3, q4, #16
vsri.u32 q3, q4, #16
veor q3, q3, q0
vrev32.16 q3, q3
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
vadd.i32 q2, q2, q3
@@ -82,9 +81,8 @@ ENTRY(chacha20_block_xor_neon)
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
vadd.i32 q0, q0, q1
veor q4, q3, q0
vshl.u32 q3, q4, #16
vsri.u32 q3, q4, #16
veor q3, q3, q0
vrev32.16 q3, q3
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
vadd.i32 q2, q2, q3
+2 -3
View File
@@ -152,7 +152,7 @@ static struct shash_alg ghash_alg = {
.cra_name = "__ghash",
.cra_driver_name = "__driver-ghash-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_key),
.cra_module = THIS_MODULE,
@@ -308,9 +308,8 @@ static struct ahash_alg ghash_async_alg = {
.cra_name = "ghash",
.cra_driver_name = "ghash-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_type = &crypto_ahash_type,
.cra_ctxsize = sizeof(struct ghash_async_ctx),
.cra_module = THIS_MODULE,
.cra_init = ghash_async_init_tfm,
-1
View File
@@ -75,7 +75,6 @@ static struct shash_alg alg = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-1
View File
@@ -67,7 +67,6 @@ static struct shash_alg alg = {
.cra_name = "sha1",
.cra_driver_name= "sha1-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-1
View File
@@ -83,7 +83,6 @@ static struct shash_alg alg = {
.cra_name = "sha1",
.cra_driver_name = "sha1-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-2
View File
@@ -78,7 +78,6 @@ static struct shash_alg algs[] = { {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -93,7 +92,6 @@ static struct shash_alg algs[] = { {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-2
View File
@@ -71,7 +71,6 @@ static struct shash_alg algs[] = { {
.cra_name = "sha256",
.cra_driver_name = "sha256-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -86,7 +85,6 @@ static struct shash_alg algs[] = { {
.cra_name = "sha224",
.cra_driver_name = "sha224-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-2
View File
@@ -79,7 +79,6 @@ struct shash_alg sha256_neon_algs[] = { {
.cra_name = "sha256",
.cra_driver_name = "sha256-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -94,7 +93,6 @@ struct shash_alg sha256_neon_algs[] = { {
.cra_name = "sha224",
.cra_driver_name = "sha224-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-2
View File
@@ -63,7 +63,6 @@ static struct shash_alg sha512_arm_algs[] = { {
.cra_name = "sha384",
.cra_driver_name = "sha384-arm",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -78,7 +77,6 @@ static struct shash_alg sha512_arm_algs[] = { {
.cra_name = "sha512",
.cra_driver_name = "sha512-arm",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-2
View File
@@ -75,7 +75,6 @@ struct shash_alg sha512_neon_algs[] = { {
.cra_name = "sha384",
.cra_driver_name = "sha384-neon",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@@ -91,7 +90,6 @@ struct shash_alg sha512_neon_algs[] = { {
.cra_name = "sha512",
.cra_driver_name = "sha512-neon",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
+284
View File
@@ -1049,7 +1049,74 @@
num-pins = <2>;
};
};
p0_mbigen_alg_a:interrupt-controller@d0080000 {
compatible = "hisilicon,mbigen-v2";
reg = <0x0 0xd0080000 0x0 0x10000>;
p0_mbigen_sec_a: intc_sec {
msi-parent = <&p0_its_dsa_a 0x40400>;
interrupt-controller;
#interrupt-cells = <2>;
num-pins = <33>;
};
p0_mbigen_smmu_alg_a: intc_smmu_alg {
msi-parent = <&p0_its_dsa_a 0x40b1b>;
interrupt-controller;
#interrupt-cells = <2>;
num-pins = <3>;
};
};
p0_mbigen_alg_b:interrupt-controller@8,d0080000 {
compatible = "hisilicon,mbigen-v2";
reg = <0x8 0xd0080000 0x0 0x10000>;
p0_mbigen_sec_b: intc_sec {
msi-parent = <&p0_its_dsa_b 0x42400>;
interrupt-controller;
#interrupt-cells = <2>;
num-pins = <33>;
};
p0_mbigen_smmu_alg_b: intc_smmu_alg {
msi-parent = <&p0_its_dsa_b 0x42b1b>;
interrupt-controller;
#interrupt-cells = <2>;
num-pins = <3>;
};
};
p1_mbigen_alg_a:interrupt-controller@400,d0080000 {
compatible = "hisilicon,mbigen-v2";
reg = <0x400 0xd0080000 0x0 0x10000>;
p1_mbigen_sec_a: intc_sec {
msi-parent = <&p1_its_dsa_a 0x44400>;
interrupt-controller;
#interrupt-cells = <2>;
num-pins = <33>;
};
p1_mbigen_smmu_alg_a: intc_smmu_alg {
msi-parent = <&p1_its_dsa_a 0x44b1b>;
interrupt-controller;
#interrupt-cells = <2>;
num-pins = <3>;
};
};
p1_mbigen_alg_b:interrupt-controller@408,d0080000 {
compatible = "hisilicon,mbigen-v2";
reg = <0x408 0xd0080000 0x0 0x10000>;
p1_mbigen_sec_b: intc_sec {
msi-parent = <&p1_its_dsa_b 0x46400>;
interrupt-controller;
#interrupt-cells = <2>;
num-pins = <33>;
};
p1_mbigen_smmu_alg_b: intc_smmu_alg {
msi-parent = <&p1_its_dsa_b 0x46b1b>;
interrupt-controller;
#interrupt-cells = <2>;
num-pins = <3>;
};
};
p0_mbigen_dsa_a: interrupt-controller@c0080000 {
compatible = "hisilicon,mbigen-v2";
reg = <0x0 0xc0080000 0x0 0x10000>;
@@ -1107,6 +1174,58 @@
hisilicon,broken-prefetch-cmd;
status = "disabled";
};
p0_smmu_alg_a: smmu_alg@d0040000 {
compatible = "arm,smmu-v3";
reg = <0x0 0xd0040000 0x0 0x20000>;
interrupt-parent = <&p0_mbigen_smmu_alg_a>;
interrupts = <733 1>,
<734 1>,
<735 1>;
interrupt-names = "eventq", "gerror", "priq";
#iommu-cells = <1>;
dma-coherent;
hisilicon,broken-prefetch-cmd;
/* smmu-cb-memtype = <0x0 0x1>;*/
};
p0_smmu_alg_b: smmu_alg@8,d0040000 {
compatible = "arm,smmu-v3";
reg = <0x8 0xd0040000 0x0 0x20000>;
interrupt-parent = <&p0_mbigen_smmu_alg_b>;
interrupts = <733 1>,
<734 1>,
<735 1>;
interrupt-names = "eventq", "gerror", "priq";
#iommu-cells = <1>;
dma-coherent;
hisilicon,broken-prefetch-cmd;
/* smmu-cb-memtype = <0x0 0x1>;*/
};
p1_smmu_alg_a: smmu_alg@400,d0040000 {
compatible = "arm,smmu-v3";
reg = <0x400 0xd0040000 0x0 0x20000>;
interrupt-parent = <&p1_mbigen_smmu_alg_a>;
interrupts = <733 1>,
<734 1>,
<735 1>;
interrupt-names = "eventq", "gerror", "priq";
#iommu-cells = <1>;
dma-coherent;
hisilicon,broken-prefetch-cmd;
/* smmu-cb-memtype = <0x0 0x1>;*/
};
p1_smmu_alg_b: smmu_alg@408,d0040000 {
compatible = "arm,smmu-v3";
reg = <0x408 0xd0040000 0x0 0x20000>;
interrupt-parent = <&p1_mbigen_smmu_alg_b>;
interrupts = <733 1>,
<734 1>,
<735 1>;
interrupt-names = "eventq", "gerror", "priq";
#iommu-cells = <1>;
dma-coherent;
hisilicon,broken-prefetch-cmd;
/* smmu-cb-memtype = <0x0 0x1>;*/
};
soc {
compatible = "simple-bus";
@@ -1603,5 +1722,170 @@
0x0 0 0 4 &mbigen_pcie2_a 671 4>;
status = "disabled";
};
p0_sec_a: crypto@d2000000 {
compatible = "hisilicon,hip07-sec";
reg = <0x0 0xd0000000 0x0 0x10000
0x0 0xd2000000 0x0 0x10000
0x0 0xd2010000 0x0 0x10000
0x0 0xd2020000 0x0 0x10000
0x0 0xd2030000 0x0 0x10000
0x0 0xd2040000 0x0 0x10000
0x0 0xd2050000 0x0 0x10000
0x0 0xd2060000 0x0 0x10000
0x0 0xd2070000 0x0 0x10000
0x0 0xd2080000 0x0 0x10000
0x0 0xd2090000 0x0 0x10000
0x0 0xd20a0000 0x0 0x10000
0x0 0xd20b0000 0x0 0x10000
0x0 0xd20c0000 0x0 0x10000
0x0 0xd20d0000 0x0 0x10000
0x0 0xd20e0000 0x0 0x10000
0x0 0xd20f0000 0x0 0x10000
0x0 0xd2100000 0x0 0x10000>;
interrupt-parent = <&p0_mbigen_sec_a>;
iommus = <&p0_smmu_alg_a 0x600>;
dma-coherent;
interrupts = <576 4>,
<577 1>, <578 4>,
<579 1>, <580 4>,
<581 1>, <582 4>,
<583 1>, <584 4>,
<585 1>, <586 4>,
<587 1>, <588 4>,
<589 1>, <590 4>,
<591 1>, <592 4>,
<593 1>, <594 4>,
<595 1>, <596 4>,
<597 1>, <598 4>,
<599 1>, <600 4>,
<601 1>, <602 4>,
<603 1>, <604 4>,
<605 1>, <606 4>,
<607 1>, <608 4>;
};
p0_sec_b: crypto@8,d2000000 {
compatible = "hisilicon,hip07-sec";
reg = <0x8 0xd0000000 0x0 0x10000
0x8 0xd2000000 0x0 0x10000
0x8 0xd2010000 0x0 0x10000
0x8 0xd2020000 0x0 0x10000
0x8 0xd2030000 0x0 0x10000
0x8 0xd2040000 0x0 0x10000
0x8 0xd2050000 0x0 0x10000
0x8 0xd2060000 0x0 0x10000
0x8 0xd2070000 0x0 0x10000
0x8 0xd2080000 0x0 0x10000
0x8 0xd2090000 0x0 0x10000
0x8 0xd20a0000 0x0 0x10000
0x8 0xd20b0000 0x0 0x10000
0x8 0xd20c0000 0x0 0x10000
0x8 0xd20d0000 0x0 0x10000
0x8 0xd20e0000 0x0 0x10000
0x8 0xd20f0000 0x0 0x10000
0x8 0xd2100000 0x0 0x10000>;
interrupt-parent = <&p0_mbigen_sec_b>;
iommus = <&p0_smmu_alg_b 0x600>;
dma-coherent;
interrupts = <576 4>,
<577 1>, <578 4>,
<579 1>, <580 4>,
<581 1>, <582 4>,
<583 1>, <584 4>,
<585 1>, <586 4>,
<587 1>, <588 4>,
<589 1>, <590 4>,
<591 1>, <592 4>,
<593 1>, <594 4>,
<595 1>, <596 4>,
<597 1>, <598 4>,
<599 1>, <600 4>,
<601 1>, <602 4>,
<603 1>, <604 4>,
<605 1>, <606 4>,
<607 1>, <608 4>;
};
p1_sec_a: crypto@400,d2000000 {
compatible = "hisilicon,hip07-sec";
reg = <0x400 0xd0000000 0x0 0x10000
0x400 0xd2000000 0x0 0x10000
0x400 0xd2010000 0x0 0x10000
0x400 0xd2020000 0x0 0x10000
0x400 0xd2030000 0x0 0x10000
0x400 0xd2040000 0x0 0x10000
0x400 0xd2050000 0x0 0x10000
0x400 0xd2060000 0x0 0x10000
0x400 0xd2070000 0x0 0x10000
0x400 0xd2080000 0x0 0x10000
0x400 0xd2090000 0x0 0x10000
0x400 0xd20a0000 0x0 0x10000
0x400 0xd20b0000 0x0 0x10000
0x400 0xd20c0000 0x0 0x10000
0x400 0xd20d0000 0x0 0x10000
0x400 0xd20e0000 0x0 0x10000
0x400 0xd20f0000 0x0 0x10000
0x400 0xd2100000 0x0 0x10000>;
interrupt-parent = <&p1_mbigen_sec_a>;
iommus = <&p1_smmu_alg_a 0x600>;
dma-coherent;
interrupts = <576 4>,
<577 1>, <578 4>,
<579 1>, <580 4>,
<581 1>, <582 4>,
<583 1>, <584 4>,
<585 1>, <586 4>,
<587 1>, <588 4>,
<589 1>, <590 4>,
<591 1>, <592 4>,
<593 1>, <594 4>,
<595 1>, <596 4>,
<597 1>, <598 4>,
<599 1>, <600 4>,
<601 1>, <602 4>,
<603 1>, <604 4>,
<605 1>, <606 4>,
<607 1>, <608 4>;
};
p1_sec_b: crypto@408,d2000000 {
compatible = "hisilicon,hip07-sec";
reg = <0x408 0xd0000000 0x0 0x10000
0x408 0xd2000000 0x0 0x10000
0x408 0xd2010000 0x0 0x10000
0x408 0xd2020000 0x0 0x10000
0x408 0xd2030000 0x0 0x10000
0x408 0xd2040000 0x0 0x10000
0x408 0xd2050000 0x0 0x10000
0x408 0xd2060000 0x0 0x10000
0x408 0xd2070000 0x0 0x10000
0x408 0xd2080000 0x0 0x10000
0x408 0xd2090000 0x0 0x10000
0x408 0xd20a0000 0x0 0x10000
0x408 0xd20b0000 0x0 0x10000
0x408 0xd20c0000 0x0 0x10000
0x408 0xd20d0000 0x0 0x10000
0x408 0xd20e0000 0x0 0x10000
0x408 0xd20f0000 0x0 0x10000
0x408 0xd2100000 0x0 0x10000>;
interrupt-parent = <&p1_mbigen_sec_b>;
iommus = <&p1_smmu_alg_b 0x600>;
dma-coherent;
interrupts = <576 4>,
<577 1>, <578 4>,
<579 1>, <580 4>,
<581 1>, <582 4>,
<583 1>, <584 4>,
<585 1>, <586 4>,
<587 1>, <588 4>,
<589 1>, <590 4>,
<591 1>, <592 4>,
<593 1>, <594 4>,
<595 1>, <596 4>,
<597 1>, <598 4>,
<599 1>, <600 4>,
<601 1>, <602 4>,
<603 1>, <604 4>,
<605 1>, <606 4>,
<607 1>, <608 4>;
};
};
};
-3
View File
@@ -567,7 +567,6 @@ static struct shash_alg mac_algs[] = { {
.base.cra_name = "cmac(aes)",
.base.cra_driver_name = "cmac-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
2 * AES_BLOCK_SIZE,
@@ -583,7 +582,6 @@ static struct shash_alg mac_algs[] = { {
.base.cra_name = "xcbc(aes)",
.base.cra_driver_name = "xcbc-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
2 * AES_BLOCK_SIZE,
@@ -599,7 +597,6 @@ static struct shash_alg mac_algs[] = { {
.base.cra_name = "cbcmac(aes)",
.base.cra_driver_name = "cbcmac-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
.base.cra_module = THIS_MODULE,
+192 -79
View File
@@ -1,7 +1,7 @@
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
* Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
@@ -46,6 +46,19 @@
ss3 .req v26
ss4 .req v27
XL2 .req v8
XM2 .req v9
XH2 .req v10
XL3 .req v11
XM3 .req v12
XH3 .req v13
TT3 .req v14
TT4 .req v15
HH .req v16
HH3 .req v17
HH4 .req v18
HH34 .req v19
.text
.arch armv8-a+crypto
@@ -134,11 +147,25 @@
.endm
.macro __pmull_pre_p64
add x8, x3, #16
ld1 {HH.2d-HH4.2d}, [x8]
trn1 SHASH2.2d, SHASH.2d, HH.2d
trn2 T1.2d, SHASH.2d, HH.2d
eor SHASH2.16b, SHASH2.16b, T1.16b
trn1 HH34.2d, HH3.2d, HH4.2d
trn2 T1.2d, HH3.2d, HH4.2d
eor HH34.16b, HH34.16b, T1.16b
movi MASK.16b, #0xe1
shl MASK.2d, MASK.2d, #57
.endm
.macro __pmull_pre_p8
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
eor SHASH2.16b, SHASH2.16b, SHASH.16b
// k00_16 := 0x0000000000000000_000000000000ffff
// k32_48 := 0x00000000ffffffff_0000ffffffffffff
movi k32_48.2d, #0xffffffff
@@ -213,31 +240,88 @@
.endm
.macro __pmull_ghash, pn
frame_push 5
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
0: ld1 {SHASH.2d}, [x22]
ld1 {XL.2d}, [x20]
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
eor SHASH2.16b, SHASH2.16b, SHASH.16b
ld1 {SHASH.2d}, [x3]
ld1 {XL.2d}, [x1]
__pmull_pre_\pn
/* do the head block first, if supplied */
cbz x23, 1f
ld1 {T1.2d}, [x23]
mov x23, xzr
b 2f
cbz x4, 0f
ld1 {T1.2d}, [x4]
mov x4, xzr
b 3f
1: ld1 {T1.2d}, [x21], #16
sub w19, w19, #1
0: .ifc \pn, p64
tbnz w0, #0, 2f // skip until #blocks is a
tbnz w0, #1, 2f // round multiple of 4
2: /* multiply XL by SHASH in GF(2^128) */
1: ld1 {XM3.16b-TT4.16b}, [x2], #64
sub w0, w0, #4
rev64 T1.16b, XM3.16b
rev64 T2.16b, XH3.16b
rev64 TT4.16b, TT4.16b
rev64 TT3.16b, TT3.16b
ext IN1.16b, TT4.16b, TT4.16b, #8
ext XL3.16b, TT3.16b, TT3.16b, #8
eor TT4.16b, TT4.16b, IN1.16b
pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1
pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0
pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
eor TT3.16b, TT3.16b, XL3.16b
pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1
pmull XL3.1q, HH.1d, XL3.1d // a0 * b0
pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
ext IN1.16b, T2.16b, T2.16b, #8
eor XL2.16b, XL2.16b, XL3.16b
eor XH2.16b, XH2.16b, XH3.16b
eor XM2.16b, XM2.16b, XM3.16b
eor T2.16b, T2.16b, IN1.16b
pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1
pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0
pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
eor XL2.16b, XL2.16b, XL3.16b
eor XH2.16b, XH2.16b, XH3.16b
eor XM2.16b, XM2.16b, XM3.16b
ext IN1.16b, T1.16b, T1.16b, #8
ext TT3.16b, XL.16b, XL.16b, #8
eor XL.16b, XL.16b, IN1.16b
eor T1.16b, T1.16b, TT3.16b
pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1
eor T1.16b, T1.16b, XL.16b
pmull XL.1q, HH4.1d, XL.1d // a0 * b0
pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
eor XL.16b, XL.16b, XL2.16b
eor XH.16b, XH.16b, XH2.16b
eor XM.16b, XM.16b, XM2.16b
eor T2.16b, XL.16b, XH.16b
ext T1.16b, XL.16b, XH.16b, #8
eor XM.16b, XM.16b, T2.16b
__pmull_reduce_p64
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
cbz w0, 5f
b 1b
.endif
2: ld1 {T1.2d}, [x2], #16
sub w0, w0, #1
3: /* multiply XL by SHASH in GF(2^128) */
CPU_LE( rev64 T1.16b, T1.16b )
ext T2.16b, XL.16b, XL.16b, #8
@@ -250,7 +334,7 @@ CPU_LE( rev64 T1.16b, T1.16b )
__pmull_\pn XL, XL, SHASH // a0 * b0
__pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0)
eor T2.16b, XL.16b, XH.16b
4: eor T2.16b, XL.16b, XH.16b
ext T1.16b, XL.16b, XH.16b, #8
eor XM.16b, XM.16b, T2.16b
@@ -259,18 +343,9 @@ CPU_LE( rev64 T1.16b, T1.16b )
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
cbz w19, 3f
cbnz w0, 0b
if_will_cond_yield_neon
st1 {XL.2d}, [x20]
do_cond_yield_neon
b 0b
endif_yield_neon
b 1b
3: st1 {XL.2d}, [x20]
frame_pop
5: st1 {XL.2d}, [x1]
ret
.endm
@@ -286,9 +361,10 @@ ENTRY(pmull_ghash_update_p8)
__pmull_ghash p8
ENDPROC(pmull_ghash_update_p8)
KS .req v8
CTR .req v9
INP .req v10
KS0 .req v12
KS1 .req v13
INP0 .req v14
INP1 .req v15
.macro load_round_keys, rounds, rk
cmp \rounds, #12
@@ -322,98 +398,128 @@ ENDPROC(pmull_ghash_update_p8)
.endm
.macro pmull_gcm_do_crypt, enc
ld1 {SHASH.2d}, [x4]
ld1 {SHASH.2d}, [x4], #16
ld1 {HH.2d}, [x4]
ld1 {XL.2d}, [x1]
ldr x8, [x5, #8] // load lower counter
load_round_keys w7, x6
movi MASK.16b, #0xe1
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
trn1 SHASH2.2d, SHASH.2d, HH.2d
trn2 T1.2d, SHASH.2d, HH.2d
CPU_LE( rev x8, x8 )
shl MASK.2d, MASK.2d, #57
eor SHASH2.16b, SHASH2.16b, SHASH.16b
eor SHASH2.16b, SHASH2.16b, T1.16b
.if \enc == 1
ldr x10, [sp]
ld1 {KS.16b}, [x10]
ld1 {KS0.16b-KS1.16b}, [x10]
.endif
0: ld1 {CTR.8b}, [x5] // load upper counter
ld1 {INP.16b}, [x3], #16
cbnz x6, 4f
0: ld1 {INP0.16b-INP1.16b}, [x3], #32
rev x9, x8
add x8, x8, #1
sub w0, w0, #1
ins CTR.d[1], x9 // set lower counter
add x11, x8, #1
add x8, x8, #2
.if \enc == 1
eor INP.16b, INP.16b, KS.16b // encrypt input
st1 {INP.16b}, [x2], #16
eor INP0.16b, INP0.16b, KS0.16b // encrypt input
eor INP1.16b, INP1.16b, KS1.16b
.endif
rev64 T1.16b, INP.16b
ld1 {KS0.8b}, [x5] // load upper counter
rev x11, x11
sub w0, w0, #2
mov KS1.8b, KS0.8b
ins KS0.d[1], x9 // set lower counter
ins KS1.d[1], x11
rev64 T1.16b, INP1.16b
cmp w7, #12
b.ge 2f // AES-192/256?
1: enc_round CTR, v21
ext T2.16b, XL.16b, XL.16b, #8
1: enc_round KS0, v21
ext IN1.16b, T1.16b, T1.16b, #8
enc_round CTR, v22
enc_round KS1, v21
pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1
enc_round KS0, v22
eor T1.16b, T1.16b, IN1.16b
enc_round KS1, v22
pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0
enc_round KS0, v23
pmull XM2.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
enc_round KS1, v23
rev64 T1.16b, INP0.16b
ext T2.16b, XL.16b, XL.16b, #8
enc_round KS0, v24
ext IN1.16b, T1.16b, T1.16b, #8
eor T1.16b, T1.16b, T2.16b
enc_round KS1, v24
eor XL.16b, XL.16b, IN1.16b
enc_round CTR, v23
pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1
enc_round KS0, v25
eor T1.16b, T1.16b, XL.16b
enc_round CTR, v24
enc_round KS1, v25
pmull2 XH.1q, HH.2d, XL.2d // a1 * b1
pmull XL.1q, SHASH.1d, XL.1d // a0 * b0
pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
enc_round KS0, v26
pmull XL.1q, HH.1d, XL.1d // a0 * b0
enc_round CTR, v25
enc_round KS1, v26
pmull2 XM.1q, SHASH2.2d, T1.2d // (a1 + a0)(b1 + b0)
enc_round KS0, v27
eor XL.16b, XL.16b, XL2.16b
eor XH.16b, XH.16b, XH2.16b
enc_round KS1, v27
eor XM.16b, XM.16b, XM2.16b
ext T1.16b, XL.16b, XH.16b, #8
enc_round KS0, v28
eor T2.16b, XL.16b, XH.16b
eor XM.16b, XM.16b, T1.16b
enc_round CTR, v26
enc_round KS1, v28
eor XM.16b, XM.16b, T2.16b
enc_round KS0, v29
pmull T2.1q, XL.1d, MASK.1d
enc_round CTR, v27
enc_round KS1, v29
mov XH.d[0], XM.d[1]
mov XM.d[1], XL.d[0]
enc_round CTR, v28
aese KS0.16b, v30.16b
eor XL.16b, XM.16b, T2.16b
enc_round CTR, v29
aese KS1.16b, v30.16b
ext T2.16b, XL.16b, XL.16b, #8
aese CTR.16b, v30.16b
eor KS0.16b, KS0.16b, v31.16b
pmull XL.1q, XL.1d, MASK.1d
eor T2.16b, T2.16b, XH.16b
eor KS.16b, CTR.16b, v31.16b
eor KS1.16b, KS1.16b, v31.16b
eor XL.16b, XL.16b, T2.16b
.if \enc == 0
eor INP.16b, INP.16b, KS.16b
st1 {INP.16b}, [x2], #16
eor INP0.16b, INP0.16b, KS0.16b
eor INP1.16b, INP1.16b, KS1.16b
.endif
st1 {INP0.16b-INP1.16b}, [x2], #32
cbnz w0, 0b
CPU_LE( rev x8, x8 )
@@ -421,17 +527,24 @@ CPU_LE( rev x8, x8 )
str x8, [x5, #8] // store lower counter
.if \enc == 1
st1 {KS.16b}, [x10]
st1 {KS0.16b-KS1.16b}, [x10]
.endif
ret
2: b.eq 3f // AES-192?
enc_round CTR, v17
enc_round CTR, v18
3: enc_round CTR, v19
enc_round CTR, v20
enc_round KS0, v17
enc_round KS1, v17
enc_round KS0, v18
enc_round KS1, v18
3: enc_round KS0, v19
enc_round KS1, v19
enc_round KS0, v20
enc_round KS1, v20
b 1b
4: load_round_keys w7, x6
b 0b
.endm
/*
+129 -77
View File
@@ -1,7 +1,7 @@
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
* Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
@@ -33,9 +33,12 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GCM_IV_SIZE 12
struct ghash_key {
u64 a;
u64 b;
be128 k;
u64 h[2];
u64 h2[2];
u64 h3[2];
u64 h4[2];
be128 k;
};
struct ghash_desc_ctx {
@@ -113,6 +116,9 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
}
}
/* avoid hogging the CPU for too long */
#define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
@@ -136,11 +142,16 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
blocks = len / GHASH_BLOCK_SIZE;
len %= GHASH_BLOCK_SIZE;
ghash_do_update(blocks, ctx->digest, src, key,
partial ? ctx->buf : NULL);
do {
int chunk = min(blocks, MAX_BLOCKS);
src += blocks * GHASH_BLOCK_SIZE;
partial = 0;
ghash_do_update(chunk, ctx->digest, src, key,
partial ? ctx->buf : NULL);
blocks -= chunk;
src += chunk * GHASH_BLOCK_SIZE;
partial = 0;
} while (unlikely(blocks > 0));
}
if (len)
memcpy(ctx->buf + partial, src, len);
@@ -166,23 +177,36 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
return 0;
}
static void ghash_reflect(u64 h[], const be128 *k)
{
u64 carry = be64_to_cpu(k->a) & BIT(63) ? 1 : 0;
h[0] = (be64_to_cpu(k->b) << 1) | carry;
h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
if (carry)
h[1] ^= 0xc200000000000000UL;
}
static int __ghash_setkey(struct ghash_key *key,
const u8 *inkey, unsigned int keylen)
{
u64 a, b;
be128 h;
/* needed for the fallback */
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
/* perform multiplication by 'x' in GF(2^128) */
b = get_unaligned_be64(inkey);
a = get_unaligned_be64(inkey + 8);
ghash_reflect(key->h, &key->k);
key->a = (a << 1) | (b >> 63);
key->b = (b << 1) | (a >> 63);
h = key->k;
gf128mul_lle(&h, &key->k);
ghash_reflect(key->h2, &h);
if (b >> 63)
key->b ^= 0xc200000000000000UL;
gf128mul_lle(&h, &key->k);
ghash_reflect(key->h3, &h);
gf128mul_lle(&h, &key->k);
ghash_reflect(key->h4, &h);
return 0;
}
@@ -204,7 +228,6 @@ static struct shash_alg ghash_alg = {
.base.cra_name = "ghash",
.base.cra_driver_name = "ghash-ce",
.base.cra_priority = 200,
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct ghash_key),
.base.cra_module = THIS_MODULE,
@@ -245,7 +268,7 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
__aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
num_rounds(&ctx->aes_key));
return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128));
}
static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
@@ -349,9 +372,10 @@ static int gcm_encrypt(struct aead_request *req)
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
struct skcipher_walk walk;
u8 iv[AES_BLOCK_SIZE];
u8 ks[AES_BLOCK_SIZE];
u8 ks[2 * AES_BLOCK_SIZE];
u8 tag[AES_BLOCK_SIZE];
u64 dg[2] = {};
int nrounds = num_rounds(&ctx->aes_key);
int err;
if (req->assoclen)
@@ -360,38 +384,38 @@ static int gcm_encrypt(struct aead_request *req)
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(1, iv + GCM_IV_SIZE);
if (likely(may_use_simd())) {
err = skcipher_walk_aead_encrypt(&walk, req, false);
if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) {
u32 const *rk = NULL;
kernel_neon_begin();
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key));
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
pmull_gcm_encrypt_block(ks, iv, NULL,
num_rounds(&ctx->aes_key));
pmull_gcm_encrypt_block(ks, iv, NULL, nrounds);
put_unaligned_be32(3, iv + GCM_IV_SIZE);
kernel_neon_end();
pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds);
put_unaligned_be32(4, iv + GCM_IV_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, false);
do {
int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
if (rk)
kernel_neon_begin();
kernel_neon_begin();
pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
walk.src.virt.addr, &ctx->ghash_key,
iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key), ks);
iv, rk, nrounds, ks);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes % AES_BLOCK_SIZE);
}
} else {
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
num_rounds(&ctx->aes_key));
put_unaligned_be32(2, iv + GCM_IV_SIZE);
walk.nbytes % (2 * AES_BLOCK_SIZE));
err = skcipher_walk_aead_encrypt(&walk, req, false);
rk = ctx->aes_key.key_enc;
} while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
} else {
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -400,8 +424,7 @@ static int gcm_encrypt(struct aead_request *req)
do {
__aes_arm64_encrypt(ctx->aes_key.key_enc,
ks, iv,
num_rounds(&ctx->aes_key));
ks, iv, nrounds);
crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
crypto_inc(iv, AES_BLOCK_SIZE);
@@ -418,19 +441,28 @@ static int gcm_encrypt(struct aead_request *req)
}
if (walk.nbytes)
__aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
num_rounds(&ctx->aes_key));
nrounds);
}
/* handle the tail */
if (walk.nbytes) {
u8 buf[GHASH_BLOCK_SIZE];
unsigned int nbytes = walk.nbytes;
u8 *dst = walk.dst.virt.addr;
u8 *head = NULL;
crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
walk.nbytes);
memcpy(buf, walk.dst.virt.addr, walk.nbytes);
memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
if (walk.nbytes > GHASH_BLOCK_SIZE) {
head = dst;
dst += GHASH_BLOCK_SIZE;
nbytes %= GHASH_BLOCK_SIZE;
}
memcpy(buf, dst, nbytes);
memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head);
err = skcipher_walk_done(&walk, 0);
}
@@ -453,10 +485,11 @@ static int gcm_decrypt(struct aead_request *req)
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
unsigned int authsize = crypto_aead_authsize(aead);
struct skcipher_walk walk;
u8 iv[AES_BLOCK_SIZE];
u8 iv[2 * AES_BLOCK_SIZE];
u8 tag[AES_BLOCK_SIZE];
u8 buf[GHASH_BLOCK_SIZE];
u8 buf[2 * GHASH_BLOCK_SIZE];
u64 dg[2] = {};
int nrounds = num_rounds(&ctx->aes_key);
int err;
if (req->assoclen)
@@ -465,43 +498,53 @@ static int gcm_decrypt(struct aead_request *req)
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(1, iv + GCM_IV_SIZE);
if (likely(may_use_simd())) {
err = skcipher_walk_aead_decrypt(&walk, req, false);
if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) {
u32 const *rk = NULL;
kernel_neon_begin();
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key));
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
kernel_neon_end();
err = skcipher_walk_aead_decrypt(&walk, req, false);
do {
int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
int rem = walk.total - blocks * AES_BLOCK_SIZE;
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
if (rk)
kernel_neon_begin();
kernel_neon_begin();
pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
walk.src.virt.addr, &ctx->ghash_key,
iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key));
iv, rk, nrounds);
/* check if this is the final iteration of the loop */
if (rem < (2 * AES_BLOCK_SIZE)) {
u8 *iv2 = iv + AES_BLOCK_SIZE;
if (rem > AES_BLOCK_SIZE) {
memcpy(iv2, iv, AES_BLOCK_SIZE);
crypto_inc(iv2, AES_BLOCK_SIZE);
}
pmull_gcm_encrypt_block(iv, iv, NULL, nrounds);
if (rem > AES_BLOCK_SIZE)
pmull_gcm_encrypt_block(iv2, iv2, NULL,
nrounds);
}
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes % AES_BLOCK_SIZE);
}
if (walk.nbytes) {
kernel_neon_begin();
pmull_gcm_encrypt_block(iv, iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key));
kernel_neon_end();
}
walk.nbytes % (2 * AES_BLOCK_SIZE));
rk = ctx->aes_key.key_enc;
} while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
} else {
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
num_rounds(&ctx->aes_key));
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
err = skcipher_walk_aead_decrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
u8 *dst = walk.dst.virt.addr;
@@ -512,8 +555,7 @@ static int gcm_decrypt(struct aead_request *req)
do {
__aes_arm64_encrypt(ctx->aes_key.key_enc,
buf, iv,
num_rounds(&ctx->aes_key));
buf, iv, nrounds);
crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
crypto_inc(iv, AES_BLOCK_SIZE);
@@ -526,14 +568,24 @@ static int gcm_decrypt(struct aead_request *req)
}
if (walk.nbytes)
__aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
num_rounds(&ctx->aes_key));
nrounds);
}
/* handle the tail */
if (walk.nbytes) {
memcpy(buf, walk.src.virt.addr, walk.nbytes);
memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
const u8 *src = walk.src.virt.addr;
const u8 *head = NULL;
unsigned int nbytes = walk.nbytes;
if (walk.nbytes > GHASH_BLOCK_SIZE) {
head = src;
src += GHASH_BLOCK_SIZE;
nbytes %= GHASH_BLOCK_SIZE;
}
memcpy(buf, src, nbytes);
memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head);
crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
walk.nbytes);
@@ -558,7 +610,7 @@ static int gcm_decrypt(struct aead_request *req)
static struct aead_alg gcm_aes_alg = {
.ivsize = GCM_IV_SIZE,
.chunksize = AES_BLOCK_SIZE,
.chunksize = 2 * AES_BLOCK_SIZE,
.maxauthsize = AES_BLOCK_SIZE,
.setkey = gcm_setkey,
.setauthsize = gcm_setauthsize,
-1
View File
@@ -99,7 +99,6 @@ static struct shash_alg alg = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}

Some files were not shown because too many files have changed in this diff Show More