mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
c69ac544dc
Bug 661609, Bug 650276 (code not used by Mozilla), Bug 602509, Bug 655411, Bug 655850, Bug 671711, Bug 617565, Bug 668001, Bug 346583, Bug 661061.
1668 lines
54 KiB
ArmAsm
1668 lines
54 KiB
ArmAsm
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is Network Security Services.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Red Hat Inc.
|
|
* Portions created by the Initial Developer are Copyright (C) 2009
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Ulrich Drepper <drepper@redhat.com>
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
.text
|
|
|
|
#define IV_OFFSET 16
|
|
#define EXPANDED_KEY_OFFSET 48
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_encrypt_init_128,@function
|
|
.globl intel_aes_encrypt_init_128
|
|
.align 16
|
|
intel_aes_encrypt_init_128:
|
|
movups (%rdi), %xmm1
|
|
movups %xmm1, (%rsi)
|
|
leaq 16(%rsi), %rsi
|
|
xorl %eax, %eax
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
|
|
ret
|
|
.size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_decrypt_init_128,@function
|
|
.globl intel_aes_decrypt_init_128
|
|
.align 16
|
|
intel_aes_decrypt_init_128:
|
|
movups (%rdi), %xmm1
|
|
movups %xmm1, (%rsi)
|
|
leaq 16(%rsi), %rsi
|
|
xorl %eax, %eax
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
|
|
ret
|
|
.size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
|
|
|
|
|
|
.type key_expansion128,@function
|
|
.align 16
|
|
key_expansion128:
|
|
movd %eax, %xmm3
|
|
pshufd $0xff, %xmm2, %xmm2
|
|
shufps $0x10, %xmm1, %xmm3
|
|
pxor %xmm3, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm3
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm3, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
addq $16, %rsi
|
|
ret
|
|
.size key_expansion128, .-key_expansion128
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_ecb_128,@function
|
|
.globl intel_aes_encrypt_ecb_128
|
|
.align 16
|
|
intel_aes_encrypt_ecb_128:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 160(%rdi), %xmm12
|
|
xor %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm2, %xmm3
|
|
pxor %xmm2, %xmm4
|
|
pxor %xmm2, %xmm5
|
|
pxor %xmm2, %xmm6
|
|
pxor %xmm2, %xmm7
|
|
pxor %xmm2, %xmm8
|
|
pxor %xmm2, %xmm9
|
|
pxor %xmm2, %xmm10
|
|
movq $16, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
|
|
addq $16, %r10
|
|
cmpq $160, %r10
|
|
jne 3b
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm2, %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_ecb_128,@function
|
|
.globl intel_aes_decrypt_ecb_128
|
|
.align 16
|
|
intel_aes_decrypt_ecb_128:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 160(%rdi), %xmm12
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm12, %xmm3
|
|
pxor %xmm12, %xmm4
|
|
pxor %xmm12, %xmm5
|
|
pxor %xmm12, %xmm6
|
|
pxor %xmm12, %xmm7
|
|
pxor %xmm12, %xmm8
|
|
pxor %xmm12, %xmm9
|
|
pxor %xmm12, %xmm10
|
|
movq $144, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm8 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm12, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_cbc_128,@function
|
|
.globl intel_aes_encrypt_cbc_128
|
|
.align 16
|
|
intel_aes_encrypt_cbc_128:
|
|
testq %r9, %r9
|
|
je 2f
|
|
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
|
|
xorl %eax, %eax
|
|
1: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm0, %xmm1
|
|
pxor %xmm2, %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm1, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 1b
|
|
|
|
movdqu %xmm0, (%rdx)
|
|
|
|
2: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_cbc_128,@function
|
|
.globl intel_aes_decrypt_cbc_128
|
|
.align 16
|
|
intel_aes_decrypt_cbc_128:
|
|
leaq 16(%rdi), %rdx /* iv */
|
|
leaq 48(%rdi), %rdi /* expanded key */
|
|
|
|
movdqu (%rdx), %xmm0 /* iv */
|
|
movdqu (%rdi), %xmm2 /* first key block */
|
|
movdqu 160(%rdi), %xmm12 /* last key block */
|
|
xorl %eax, %eax
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3 /* 1st data block */
|
|
movdqu 16(%r8, %rax), %xmm4 /* 2d data block */
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm12, %xmm3
|
|
pxor %xmm12, %xmm4
|
|
pxor %xmm12, %xmm5
|
|
pxor %xmm12, %xmm6
|
|
pxor %xmm12, %xmm7
|
|
pxor %xmm12, %xmm8
|
|
pxor %xmm12, %xmm9
|
|
pxor %xmm12, %xmm10
|
|
movq $144, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1 /* n-th block of the key */
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
pxor %xmm0, %xmm3
|
|
movdqu (%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm4
|
|
movdqu 16(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm5
|
|
movdqu 32(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm6
|
|
movdqu 48(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm7
|
|
movdqu 64(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm8
|
|
movdqu 80(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm9
|
|
movdqu 96(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm10
|
|
movdqu 112(%r8, %rax), %xmm0
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
movdqa %xmm1, %xmm13
|
|
pxor %xmm12, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
|
|
pxor %xmm0, %xmm1
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm13, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: movdqu %xmm0, (%rdx)
|
|
|
|
xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_encrypt_init_192,@function
|
|
.globl intel_aes_encrypt_init_192
|
|
.align 16
|
|
intel_aes_encrypt_init_192:
|
|
movdqu (%rdi), %xmm1
|
|
movq 16(%rdi), %xmm3
|
|
movdqu %xmm1, (%rsi)
|
|
movq %xmm3, 16(%rsi)
|
|
leaq 24(%rsi), %rsi
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
|
|
ret
|
|
.size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_decrypt_init_192,@function
|
|
.globl intel_aes_decrypt_init_192
|
|
.align 16
|
|
intel_aes_decrypt_init_192:
|
|
movdqu (%rdi), %xmm1
|
|
movq 16(%rdi), %xmm3
|
|
movdqu %xmm1, (%rsi)
|
|
movq %xmm3, 16(%rsi)
|
|
leaq 24(%rsi), %rsi
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
movups -32(%rsi), %xmm2
|
|
movups -16(%rsi), %xmm4
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
|
|
movups %xmm2, -32(%rsi)
|
|
movups %xmm4, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -24(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
movups -32(%rsi), %xmm2
|
|
movups -16(%rsi), %xmm4
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
|
|
movups %xmm2, -32(%rsi)
|
|
movups %xmm4, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -24(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
movups -32(%rsi), %xmm2
|
|
movups -16(%rsi), %xmm4
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
|
|
movups %xmm2, -32(%rsi)
|
|
movups %xmm4, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -24(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
movups -32(%rsi), %xmm2
|
|
movups -16(%rsi), %xmm4
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
|
|
movups %xmm2, -32(%rsi)
|
|
movups %xmm4, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
|
|
ret
|
|
.size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
|
|
|
|
|
|
.type key_expansion192,@function
|
|
.align 16
|
|
key_expansion192:
|
|
pshufd $0x55, %xmm2, %xmm2
|
|
xor %eax, %eax
|
|
movd %eax, %xmm4
|
|
shufps $0x10, %xmm1, %xmm4
|
|
pxor %xmm4, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm4
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm4, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
addq $16, %rsi
|
|
|
|
pshufd $0xff, %xmm1, %xmm4
|
|
movd %eax, %xmm5
|
|
shufps $0x00, %xmm3, %xmm5
|
|
shufps $0x08, %xmm3, %xmm5
|
|
pxor %xmm4, %xmm3
|
|
pxor %xmm5, %xmm3
|
|
movq %xmm3, (%rsi)
|
|
addq $8, %rsi
|
|
ret
|
|
.size key_expansion192, .-key_expansion192
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_ecb_192,@function
|
|
.globl intel_aes_encrypt_ecb_192
|
|
.align 16
|
|
intel_aes_encrypt_ecb_192:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 192(%rdi), %xmm14
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm2, %xmm3
|
|
pxor %xmm2, %xmm4
|
|
pxor %xmm2, %xmm5
|
|
pxor %xmm2, %xmm6
|
|
pxor %xmm2, %xmm7
|
|
pxor %xmm2, %xmm8
|
|
pxor %xmm2, %xmm9
|
|
pxor %xmm2, %xmm10
|
|
movq $16, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
|
|
addq $16, %r10
|
|
cmpq $192, %r10
|
|
jne 3b
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
movdqu 176(%rdi), %xmm13
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm2, %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_ecb_192,@function
|
|
.globl intel_aes_decrypt_ecb_192
|
|
.align 16
|
|
intel_aes_decrypt_ecb_192:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 192(%rdi), %xmm14
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm14, %xmm3
|
|
pxor %xmm14, %xmm4
|
|
pxor %xmm14, %xmm5
|
|
pxor %xmm14, %xmm6
|
|
pxor %xmm14, %xmm7
|
|
pxor %xmm14, %xmm8
|
|
pxor %xmm14, %xmm9
|
|
pxor %xmm14, %xmm10
|
|
movq $176, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
movdqu 176(%rdi), %xmm13
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm14, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_cbc_192,@function
|
|
.globl intel_aes_encrypt_cbc_192
|
|
.align 16
|
|
intel_aes_encrypt_cbc_192:
|
|
testq %r9, %r9
|
|
je 2f
|
|
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
movdqu 176(%rdi), %xmm13
|
|
movdqu 192(%rdi), %xmm14
|
|
|
|
xorl %eax, %eax
|
|
1: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm0, %xmm1
|
|
pxor %xmm2, %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm1, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 1b
|
|
|
|
movdqu %xmm0, (%rdx)
|
|
|
|
2: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_cbc_192,@function
|
|
.globl intel_aes_decrypt_cbc_192
|
|
.align 16
|
|
intel_aes_decrypt_cbc_192:
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 192(%rdi), %xmm14
|
|
xorl %eax, %eax
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm14, %xmm3
|
|
pxor %xmm14, %xmm4
|
|
pxor %xmm14, %xmm5
|
|
pxor %xmm14, %xmm6
|
|
pxor %xmm14, %xmm7
|
|
pxor %xmm14, %xmm8
|
|
pxor %xmm14, %xmm9
|
|
pxor %xmm14, %xmm10
|
|
movq $176, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
pxor %xmm0, %xmm3
|
|
movdqu (%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm4
|
|
movdqu 16(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm5
|
|
movdqu 32(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm6
|
|
movdqu 48(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm7
|
|
movdqu 64(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm8
|
|
movdqu 80(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm9
|
|
movdqu 96(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm10
|
|
movdqu 112(%r8, %rax), %xmm0
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
movdqu 176(%rdi), %xmm13
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
movdqa %xmm1, %xmm15
|
|
pxor %xmm14, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
|
|
pxor %xmm0, %xmm1
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm15, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: movdqu %xmm0, (%rdx)
|
|
|
|
xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_encrypt_init_256,@function
|
|
.globl intel_aes_encrypt_init_256
|
|
.align 16
|
|
intel_aes_encrypt_init_256:
|
|
movdqu (%rdi), %xmm1
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu %xmm1, (%rsi)
|
|
movdqu %xmm3, 16(%rsi)
|
|
leaq 32(%rsi), %rsi
|
|
xor %eax, %eax
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
|
|
pxor %xmm6, %xmm6
|
|
pshufd $0xff, %xmm2, %xmm2
|
|
shufps $0x10, %xmm1, %xmm6
|
|
pxor %xmm6, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm6
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm6, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
|
|
ret
|
|
.size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_decrypt_init_256,@function
|
|
.globl intel_aes_decrypt_init_256
|
|
.align 16
|
|
intel_aes_decrypt_init_256:
|
|
movdqu (%rdi), %xmm1
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu %xmm1, (%rsi)
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */
|
|
movdqu %xmm4, 16(%rsi)
|
|
leaq 32(%rsi), %rsi
|
|
xor %eax, %eax
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
|
|
pxor %xmm6, %xmm6
|
|
pshufd $0xff, %xmm2, %xmm2
|
|
shufps $0x10, %xmm1, %xmm6
|
|
pxor %xmm6, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm6
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm6, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
|
|
ret
|
|
.size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
|
|
|
|
|
|
.type key_expansion256,@function
|
|
.align 16
|
|
key_expansion256:
|
|
movd %eax, %xmm6
|
|
pshufd $0xff, %xmm2, %xmm2
|
|
shufps $0x10, %xmm1, %xmm6
|
|
pxor %xmm6, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm6
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm6, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
|
|
addq $16, %rsi
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */
|
|
pshufd $0xaa, %xmm4, %xmm4
|
|
shufps $0x10, %xmm3, %xmm6
|
|
pxor %xmm6, %xmm3
|
|
shufps $0x8c, %xmm3, %xmm6
|
|
pxor %xmm4, %xmm3
|
|
pxor %xmm6, %xmm3
|
|
movdqu %xmm3, (%rsi)
|
|
addq $16, %rsi
|
|
ret
|
|
.size key_expansion256, .-key_expansion256
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_ecb_256,@function
|
|
.globl intel_aes_encrypt_ecb_256
|
|
.align 16
|
|
intel_aes_encrypt_ecb_256:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 224(%rdi), %xmm15
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm2, %xmm3
|
|
pxor %xmm2, %xmm4
|
|
pxor %xmm2, %xmm5
|
|
pxor %xmm2, %xmm6
|
|
pxor %xmm2, %xmm7
|
|
pxor %xmm2, %xmm8
|
|
pxor %xmm2, %xmm9
|
|
pxor %xmm2, %xmm10
|
|
movq $16, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
|
|
addq $16, %r10
|
|
cmpq $224, %r10
|
|
jne 3b
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu (%rdi), %xmm8
|
|
movdqu 16(%rdi), %xmm2
|
|
movdqu 32(%rdi), %xmm3
|
|
movdqu 48(%rdi), %xmm4
|
|
movdqu 64(%rdi), %xmm5
|
|
movdqu 80(%rdi), %xmm6
|
|
movdqu 96(%rdi), %xmm7
|
|
movdqu 128(%rdi), %xmm9
|
|
movdqu 144(%rdi), %xmm10
|
|
movdqu 160(%rdi), %xmm11
|
|
movdqu 176(%rdi), %xmm12
|
|
movdqu 192(%rdi), %xmm13
|
|
movdqu 208(%rdi), %xmm14
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm8, %xmm1
|
|
movdqu 112(%rdi), %xmm8
|
|
.byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
movdqu (%rdi), %xmm8
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_ecb_256,@function
|
|
.globl intel_aes_decrypt_ecb_256
|
|
.align 16
|
|
intel_aes_decrypt_ecb_256:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 224(%rdi), %xmm15
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm15, %xmm3
|
|
pxor %xmm15, %xmm4
|
|
pxor %xmm15, %xmm5
|
|
pxor %xmm15, %xmm6
|
|
pxor %xmm15, %xmm7
|
|
pxor %xmm15, %xmm8
|
|
pxor %xmm15, %xmm9
|
|
pxor %xmm15, %xmm10
|
|
movq $208, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm2
|
|
movdqu 32(%rdi), %xmm3
|
|
movdqu 48(%rdi), %xmm4
|
|
movdqu 64(%rdi), %xmm5
|
|
movdqu 80(%rdi), %xmm6
|
|
movdqu 96(%rdi), %xmm7
|
|
movdqu 112(%rdi), %xmm8
|
|
movdqu 128(%rdi), %xmm9
|
|
movdqu 144(%rdi), %xmm10
|
|
movdqu 160(%rdi), %xmm11
|
|
movdqu 176(%rdi), %xmm12
|
|
movdqu 192(%rdi), %xmm13
|
|
movdqu 208(%rdi), %xmm14
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm15, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
movdqu (%rdi), %xmm8
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
|
|
movdqu 112(%rdi), %xmm8
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_cbc_256,@function
|
|
.globl intel_aes_encrypt_cbc_256
|
|
.align 16
|
|
intel_aes_encrypt_cbc_256:
|
|
testq %r9, %r9
|
|
je 2f
|
|
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm8
|
|
movdqu 16(%rdi), %xmm2
|
|
movdqu 32(%rdi), %xmm3
|
|
movdqu 48(%rdi), %xmm4
|
|
movdqu 64(%rdi), %xmm5
|
|
movdqu 80(%rdi), %xmm6
|
|
movdqu 96(%rdi), %xmm7
|
|
movdqu 128(%rdi), %xmm9
|
|
movdqu 144(%rdi), %xmm10
|
|
movdqu 160(%rdi), %xmm11
|
|
movdqu 176(%rdi), %xmm12
|
|
movdqu 192(%rdi), %xmm13
|
|
movdqu 208(%rdi), %xmm14
|
|
movdqu 224(%rdi), %xmm15
|
|
|
|
xorl %eax, %eax
|
|
1: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm0, %xmm1
|
|
pxor %xmm8, %xmm1
|
|
movdqu 112(%rdi), %xmm8
|
|
.byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
movdqu (%rdi), %xmm8
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm1, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 1b
|
|
|
|
movdqu %xmm0, (%rdx)
|
|
|
|
2: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_cbc_256,@function
|
|
.globl intel_aes_decrypt_cbc_256
|
|
.align 16
|
|
intel_aes_decrypt_cbc_256:
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 224(%rdi), %xmm15
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm15, %xmm3
|
|
pxor %xmm15, %xmm4
|
|
pxor %xmm15, %xmm5
|
|
pxor %xmm15, %xmm6
|
|
pxor %xmm15, %xmm7
|
|
pxor %xmm15, %xmm8
|
|
pxor %xmm15, %xmm9
|
|
pxor %xmm15, %xmm10
|
|
movq $208, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
pxor %xmm0, %xmm3
|
|
movdqu (%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm4
|
|
movdqu 16(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm5
|
|
movdqu 32(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm6
|
|
movdqu 48(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm7
|
|
movdqu 64(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm8
|
|
movdqu 80(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm9
|
|
movdqu 96(%r8, %rax), %xmm0
|
|
pxor %xmm0, %xmm10
|
|
movdqu 112(%r8, %rax), %xmm0
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm2
|
|
movdqu 32(%rdi), %xmm3
|
|
movdqu 48(%rdi), %xmm4
|
|
movdqu 64(%rdi), %xmm5
|
|
movdqu 80(%rdi), %xmm6
|
|
movdqu 96(%rdi), %xmm7
|
|
movdqu 112(%rdi), %xmm8
|
|
movdqu 128(%rdi), %xmm9
|
|
movdqu 144(%rdi), %xmm10
|
|
movdqu 160(%rdi), %xmm11
|
|
movdqu 176(%rdi), %xmm12
|
|
movdqu 192(%rdi), %xmm13
|
|
movdqu 208(%rdi), %xmm14
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm15, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
movdqu (%rdi), %xmm8
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
|
|
movdqu 112(%rdi), %xmm8
|
|
pxor %xmm0, %xmm1
|
|
movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */
|
|
movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: movdqu %xmm0, (%rdx)
|
|
|
|
xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256
|