gecko/security/nss/lib/freebl/intel-aes.s
Wan-Teh Chang c69ac544dc Bug 673382: Update NSS to NSS_3_12_11_BETA1, which includes the fixes for
Bug 661609, Bug 650276 (code not used by Mozilla), Bug 602509, Bug 655411,
Bug 655850, Bug 671711, Bug 617565, Bug 668001, Bug 346583, Bug 661061.
2011-07-27 09:07:32 -07:00

1668 lines
54 KiB
ArmAsm

/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Network Security Services.
*
* The Initial Developer of the Original Code is
* Red Hat Inc.
* Portions created by the Initial Developer are Copyright (C) 2009
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Ulrich Drepper <drepper@redhat.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
.text
#define IV_OFFSET 16
#define EXPANDED_KEY_OFFSET 48
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_128,@function
.globl intel_aes_encrypt_init_128
.align 16
intel_aes_encrypt_init_128:
movups (%rdi), %xmm1
movups %xmm1, (%rsi)
leaq 16(%rsi), %rsi
xorl %eax, %eax
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
call key_expansion128
ret
.size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_128,@function
.globl intel_aes_decrypt_init_128
.align 16
intel_aes_decrypt_init_128:
movups (%rdi), %xmm1
movups %xmm1, (%rsi)
leaq 16(%rsi), %rsi
xorl %eax, %eax
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
call key_expansion128
ret
.size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
.type key_expansion128,@function
.align 16
key_expansion128:
movd %eax, %xmm3
pshufd $0xff, %xmm2, %xmm2
shufps $0x10, %xmm1, %xmm3
pxor %xmm3, %xmm1
shufps $0x8c, %xmm1, %xmm3
pxor %xmm2, %xmm1
pxor %xmm3, %xmm1
movdqu %xmm1, (%rsi)
addq $16, %rsi
ret
.size key_expansion128, .-key_expansion128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_128,@function
.globl intel_aes_encrypt_ecb_128
.align 16
intel_aes_encrypt_ecb_128:
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 48(%rdi), %rdi
movdqu (%rdi), %xmm2
movdqu 160(%rdi), %xmm12
xor %eax, %eax
// cmpq $8*16, %r9
cmpq $128, %r9
jb 1f
// leaq -8*16(%r9), %r11
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm2, %xmm3
pxor %xmm2, %xmm4
pxor %xmm2, %xmm5
pxor %xmm2, %xmm6
pxor %xmm2, %xmm7
pxor %xmm2, %xmm8
pxor %xmm2, %xmm9
pxor %xmm2, %xmm10
movq $16, %r10
3: movdqu (%rdi, %r10), %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
addq $16, %r10
cmpq $160, %r10
jne 3b
.byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addq $8*16, %rax
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
4: movdqu (%r8, %rax), %xmm1
pxor %xmm2, %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: xor %eax, %eax
ret
.size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_128,@function
.globl intel_aes_decrypt_ecb_128
.align 16
intel_aes_decrypt_ecb_128:
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 48(%rdi), %rdi
movdqu (%rdi), %xmm2
movdqu 160(%rdi), %xmm12
xorl %eax, %eax
// cmpq $8*16, %r9
cmpq $128, %r9
jb 1f
// leaq -8*16(%r9), %r11
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm12, %xmm3
pxor %xmm12, %xmm4
pxor %xmm12, %xmm5
pxor %xmm12, %xmm6
pxor %xmm12, %xmm7
pxor %xmm12, %xmm8
pxor %xmm12, %xmm9
pxor %xmm12, %xmm10
movq $144, %r10
3: movdqu (%rdi, %r10), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm8 */
subq $16, %r10
jne 3b
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addq $8*16, %rax
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
4: movdqu (%r8, %rax), %xmm1
pxor %xmm12, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: xor %eax, %eax
ret
.size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_128,@function
.globl intel_aes_encrypt_cbc_128
.align 16
intel_aes_encrypt_cbc_128:
testq %r9, %r9
je 2f
// leaq IV_OFFSET(%rdi), %rdx
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 16(%rdi), %rdx
leaq 48(%rdi), %rdi
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
xorl %eax, %eax
1: movdqu (%r8, %rax), %xmm1
pxor %xmm0, %xmm1
pxor %xmm2, %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
addq $16, %rax
cmpq %rax, %r9
jne 1b
movdqu %xmm0, (%rdx)
2: xor %eax, %eax
ret
.size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_128,@function
.globl intel_aes_decrypt_cbc_128
.align 16
intel_aes_decrypt_cbc_128:
leaq 16(%rdi), %rdx /* iv */
leaq 48(%rdi), %rdi /* expanded key */
movdqu (%rdx), %xmm0 /* iv */
movdqu (%rdi), %xmm2 /* first key block */
movdqu 160(%rdi), %xmm12 /* last key block */
xorl %eax, %eax
cmpq $128, %r9
jb 1f
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3 /* 1st data block */
movdqu 16(%r8, %rax), %xmm4 /* 2d data block */
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm12, %xmm3
pxor %xmm12, %xmm4
pxor %xmm12, %xmm5
pxor %xmm12, %xmm6
pxor %xmm12, %xmm7
pxor %xmm12, %xmm8
pxor %xmm12, %xmm9
pxor %xmm12, %xmm10
movq $144, %r10
3: movdqu (%rdi, %r10), %xmm1 /* n-th block of the key */
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
subq $16, %r10
jne 3b
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
pxor %xmm0, %xmm3
movdqu (%r8, %rax), %xmm0
pxor %xmm0, %xmm4
movdqu 16(%r8, %rax), %xmm0
pxor %xmm0, %xmm5
movdqu 32(%r8, %rax), %xmm0
pxor %xmm0, %xmm6
movdqu 48(%r8, %rax), %xmm0
pxor %xmm0, %xmm7
movdqu 64(%r8, %rax), %xmm0
pxor %xmm0, %xmm8
movdqu 80(%r8, %rax), %xmm0
pxor %xmm0, %xmm9
movdqu 96(%r8, %rax), %xmm0
pxor %xmm0, %xmm10
movdqu 112(%r8, %rax), %xmm0
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
4: movdqu (%r8, %rax), %xmm1
movdqa %xmm1, %xmm13
pxor %xmm12, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
pxor %xmm0, %xmm1
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm13, %xmm0
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: movdqu %xmm0, (%rdx)
xor %eax, %eax
ret
.size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_192,@function
.globl intel_aes_encrypt_init_192
.align 16
intel_aes_encrypt_init_192:
movdqu (%rdi), %xmm1
movq 16(%rdi), %xmm3
movdqu %xmm1, (%rsi)
movq %xmm3, 16(%rsi)
leaq 24(%rsi), %rsi
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
call key_expansion192
ret
.size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_192,@function
.globl intel_aes_decrypt_init_192
.align 16
intel_aes_decrypt_init_192:
movdqu (%rdi), %xmm1
movq 16(%rdi), %xmm3
movdqu %xmm1, (%rsi)
movq %xmm3, 16(%rsi)
leaq 24(%rsi), %rsi
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion192
movups -32(%rsi), %xmm2
movups -16(%rsi), %xmm4
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
movups %xmm2, -32(%rsi)
movups %xmm4, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -24(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion192
movups -32(%rsi), %xmm2
movups -16(%rsi), %xmm4
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
movups %xmm2, -32(%rsi)
movups %xmm4, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -24(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion192
movups -32(%rsi), %xmm2
movups -16(%rsi), %xmm4
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
movups %xmm2, -32(%rsi)
movups %xmm4, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
movups %xmm2, -24(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
call key_expansion192
movups -32(%rsi), %xmm2
movups -16(%rsi), %xmm4
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
movups %xmm2, -32(%rsi)
movups %xmm4, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
call key_expansion192
ret
.size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
.type key_expansion192,@function
.align 16
key_expansion192:
pshufd $0x55, %xmm2, %xmm2
xor %eax, %eax
movd %eax, %xmm4
shufps $0x10, %xmm1, %xmm4
pxor %xmm4, %xmm1
shufps $0x8c, %xmm1, %xmm4
pxor %xmm2, %xmm1
pxor %xmm4, %xmm1
movdqu %xmm1, (%rsi)
addq $16, %rsi
pshufd $0xff, %xmm1, %xmm4
movd %eax, %xmm5
shufps $0x00, %xmm3, %xmm5
shufps $0x08, %xmm3, %xmm5
pxor %xmm4, %xmm3
pxor %xmm5, %xmm3
movq %xmm3, (%rsi)
addq $8, %rsi
ret
.size key_expansion192, .-key_expansion192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_192,@function
.globl intel_aes_encrypt_ecb_192
.align 16
intel_aes_encrypt_ecb_192:
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 48(%rdi), %rdi
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
// cmpq $8*16, %r9
cmpq $128, %r9
jb 1f
// leaq -8*16(%r9), %r11
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm2, %xmm3
pxor %xmm2, %xmm4
pxor %xmm2, %xmm5
pxor %xmm2, %xmm6
pxor %xmm2, %xmm7
pxor %xmm2, %xmm8
pxor %xmm2, %xmm9
pxor %xmm2, %xmm10
movq $16, %r10
3: movdqu (%rdi, %r10), %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
addq $16, %r10
cmpq $192, %r10
jne 3b
.byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addq $8*16, %rax
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
movdqu 176(%rdi), %xmm13
4: movdqu (%r8, %rax), %xmm1
pxor %xmm2, %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: xor %eax, %eax
ret
.size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_192,@function
.globl intel_aes_decrypt_ecb_192
.align 16
intel_aes_decrypt_ecb_192:
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 48(%rdi), %rdi
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
// cmpq $8*16, %r9
cmpq $128, %r9
jb 1f
// leaq -8*16(%r9), %r11
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm14, %xmm3
pxor %xmm14, %xmm4
pxor %xmm14, %xmm5
pxor %xmm14, %xmm6
pxor %xmm14, %xmm7
pxor %xmm14, %xmm8
pxor %xmm14, %xmm9
pxor %xmm14, %xmm10
movq $176, %r10
3: movdqu (%rdi, %r10), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
subq $16, %r10
jne 3b
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addq $8*16, %rax
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
movdqu 176(%rdi), %xmm13
4: movdqu (%r8, %rax), %xmm1
pxor %xmm14, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: xor %eax, %eax
ret
.size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_192,@function
.globl intel_aes_encrypt_cbc_192
.align 16
intel_aes_encrypt_cbc_192:
testq %r9, %r9
je 2f
// leaq IV_OFFSET(%rdi), %rdx
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 16(%rdi), %rdx
leaq 48(%rdi), %rdi
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
movdqu 176(%rdi), %xmm13
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
1: movdqu (%r8, %rax), %xmm1
pxor %xmm0, %xmm1
pxor %xmm2, %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
addq $16, %rax
cmpq %rax, %r9
jne 1b
movdqu %xmm0, (%rdx)
2: xor %eax, %eax
ret
.size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_192,@function
.globl intel_aes_decrypt_cbc_192
.align 16
intel_aes_decrypt_cbc_192:
leaq 16(%rdi), %rdx
leaq 48(%rdi), %rdi
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
cmpq $128, %r9
jb 1f
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm14, %xmm3
pxor %xmm14, %xmm4
pxor %xmm14, %xmm5
pxor %xmm14, %xmm6
pxor %xmm14, %xmm7
pxor %xmm14, %xmm8
pxor %xmm14, %xmm9
pxor %xmm14, %xmm10
movq $176, %r10
3: movdqu (%rdi, %r10), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
subq $16, %r10
jne 3b
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
pxor %xmm0, %xmm3
movdqu (%r8, %rax), %xmm0
pxor %xmm0, %xmm4
movdqu 16(%r8, %rax), %xmm0
pxor %xmm0, %xmm5
movdqu 32(%r8, %rax), %xmm0
pxor %xmm0, %xmm6
movdqu 48(%r8, %rax), %xmm0
pxor %xmm0, %xmm7
movdqu 64(%r8, %rax), %xmm0
pxor %xmm0, %xmm8
movdqu 80(%r8, %rax), %xmm0
pxor %xmm0, %xmm9
movdqu 96(%r8, %rax), %xmm0
pxor %xmm0, %xmm10
movdqu 112(%r8, %rax), %xmm0
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
movdqu 176(%rdi), %xmm13
4: movdqu (%r8, %rax), %xmm1
movdqa %xmm1, %xmm15
pxor %xmm14, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
pxor %xmm0, %xmm1
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm15, %xmm0
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: movdqu %xmm0, (%rdx)
xor %eax, %eax
ret
.size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_256,@function
.globl intel_aes_encrypt_init_256
.align 16
intel_aes_encrypt_init_256:
movdqu (%rdi), %xmm1
movdqu 16(%rdi), %xmm3
movdqu %xmm1, (%rsi)
movdqu %xmm3, 16(%rsi)
leaq 32(%rsi), %rsi
xor %eax, %eax
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
pxor %xmm6, %xmm6
pshufd $0xff, %xmm2, %xmm2
shufps $0x10, %xmm1, %xmm6
pxor %xmm6, %xmm1
shufps $0x8c, %xmm1, %xmm6
pxor %xmm2, %xmm1
pxor %xmm6, %xmm1
movdqu %xmm1, (%rsi)
ret
.size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_256,@function
.globl intel_aes_decrypt_init_256
.align 16
intel_aes_decrypt_init_256:
movdqu (%rdi), %xmm1
movdqu 16(%rdi), %xmm3
movdqu %xmm1, (%rsi)
.byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */
movdqu %xmm4, 16(%rsi)
leaq 32(%rsi), %rsi
xor %eax, %eax
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -32(%rsi)
movdqu %xmm5, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -32(%rsi)
movdqu %xmm5, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -32(%rsi)
movdqu %xmm5, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -32(%rsi)
movdqu %xmm5, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -32(%rsi)
movdqu %xmm5, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -32(%rsi)
movdqu %xmm5, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
pxor %xmm6, %xmm6
pshufd $0xff, %xmm2, %xmm2
shufps $0x10, %xmm1, %xmm6
pxor %xmm6, %xmm1
shufps $0x8c, %xmm1, %xmm6
pxor %xmm2, %xmm1
pxor %xmm6, %xmm1
movdqu %xmm1, (%rsi)
ret
.size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
.type key_expansion256,@function
.align 16
key_expansion256:
movd %eax, %xmm6
pshufd $0xff, %xmm2, %xmm2
shufps $0x10, %xmm1, %xmm6
pxor %xmm6, %xmm1
shufps $0x8c, %xmm1, %xmm6
pxor %xmm2, %xmm1
pxor %xmm6, %xmm1
movdqu %xmm1, (%rsi)
addq $16, %rsi
.byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */
pshufd $0xaa, %xmm4, %xmm4
shufps $0x10, %xmm3, %xmm6
pxor %xmm6, %xmm3
shufps $0x8c, %xmm3, %xmm6
pxor %xmm4, %xmm3
pxor %xmm6, %xmm3
movdqu %xmm3, (%rsi)
addq $16, %rsi
ret
.size key_expansion256, .-key_expansion256
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_256,@function
.globl intel_aes_encrypt_ecb_256
.align 16
intel_aes_encrypt_ecb_256:
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 48(%rdi), %rdi
movdqu (%rdi), %xmm2
movdqu 224(%rdi), %xmm15
xorl %eax, %eax
// cmpq $8*16, %r9
cmpq $128, %r9
jb 1f
// leaq -8*16(%r9), %r11
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm2, %xmm3
pxor %xmm2, %xmm4
pxor %xmm2, %xmm5
pxor %xmm2, %xmm6
pxor %xmm2, %xmm7
pxor %xmm2, %xmm8
pxor %xmm2, %xmm9
pxor %xmm2, %xmm10
movq $16, %r10
3: movdqu (%rdi, %r10), %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
addq $16, %r10
cmpq $224, %r10
jne 3b
.byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addq $8*16, %rax
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu (%rdi), %xmm8
movdqu 16(%rdi), %xmm2
movdqu 32(%rdi), %xmm3
movdqu 48(%rdi), %xmm4
movdqu 64(%rdi), %xmm5
movdqu 80(%rdi), %xmm6
movdqu 96(%rdi), %xmm7
movdqu 128(%rdi), %xmm9
movdqu 144(%rdi), %xmm10
movdqu 160(%rdi), %xmm11
movdqu 176(%rdi), %xmm12
movdqu 192(%rdi), %xmm13
movdqu 208(%rdi), %xmm14
4: movdqu (%r8, %rax), %xmm1
pxor %xmm8, %xmm1
movdqu 112(%rdi), %xmm8
.byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
movdqu (%rdi), %xmm8
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: xor %eax, %eax
ret
.size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_256,@function
.globl intel_aes_decrypt_ecb_256
.align 16
intel_aes_decrypt_ecb_256:
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 48(%rdi), %rdi
movdqu (%rdi), %xmm2
movdqu 224(%rdi), %xmm15
xorl %eax, %eax
// cmpq $8*16, %r9
cmpq $128, %r9
jb 1f
// leaq -8*16(%r9), %r11
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm15, %xmm3
pxor %xmm15, %xmm4
pxor %xmm15, %xmm5
pxor %xmm15, %xmm6
pxor %xmm15, %xmm7
pxor %xmm15, %xmm8
pxor %xmm15, %xmm9
pxor %xmm15, %xmm10
movq $208, %r10
3: movdqu (%rdi, %r10), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
subq $16, %r10
jne 3b
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addq $8*16, %rax
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu 16(%rdi), %xmm2
movdqu 32(%rdi), %xmm3
movdqu 48(%rdi), %xmm4
movdqu 64(%rdi), %xmm5
movdqu 80(%rdi), %xmm6
movdqu 96(%rdi), %xmm7
movdqu 112(%rdi), %xmm8
movdqu 128(%rdi), %xmm9
movdqu 144(%rdi), %xmm10
movdqu 160(%rdi), %xmm11
movdqu 176(%rdi), %xmm12
movdqu 192(%rdi), %xmm13
movdqu 208(%rdi), %xmm14
4: movdqu (%r8, %rax), %xmm1
pxor %xmm15, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
movdqu (%rdi), %xmm8
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
movdqu 112(%rdi), %xmm8
movdqu %xmm1, (%rsi, %rax)
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: xor %eax, %eax
ret
.size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_256,@function
.globl intel_aes_encrypt_cbc_256
.align 16
intel_aes_encrypt_cbc_256:
testq %r9, %r9
je 2f
// leaq IV_OFFSET(%rdi), %rdx
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 16(%rdi), %rdx
leaq 48(%rdi), %rdi
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm8
movdqu 16(%rdi), %xmm2
movdqu 32(%rdi), %xmm3
movdqu 48(%rdi), %xmm4
movdqu 64(%rdi), %xmm5
movdqu 80(%rdi), %xmm6
movdqu 96(%rdi), %xmm7
movdqu 128(%rdi), %xmm9
movdqu 144(%rdi), %xmm10
movdqu 160(%rdi), %xmm11
movdqu 176(%rdi), %xmm12
movdqu 192(%rdi), %xmm13
movdqu 208(%rdi), %xmm14
movdqu 224(%rdi), %xmm15
xorl %eax, %eax
1: movdqu (%r8, %rax), %xmm1
pxor %xmm0, %xmm1
pxor %xmm8, %xmm1
movdqu 112(%rdi), %xmm8
.byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
movdqu (%rdi), %xmm8
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
addq $16, %rax
cmpq %rax, %r9
jne 1b
movdqu %xmm0, (%rdx)
2: xor %eax, %eax
ret
.size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(filled by caller)
in %rcx : maxOutputLen - length of output buffer
in %r8 : input - pointer to input buffer
in %r9 : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_256,@function
.globl intel_aes_decrypt_cbc_256
.align 16
intel_aes_decrypt_cbc_256:
// leaq IV_OFFSET(%rdi), %rdx
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
leaq 16(%rdi), %rdx
leaq 48(%rdi), %rdi
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu 224(%rdi), %xmm15
xorl %eax, %eax
// cmpq $8*16, %r9
cmpq $128, %r9
jb 1f
// leaq -8*16(%r9), %r11
leaq -128(%r9), %r11
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm15, %xmm3
pxor %xmm15, %xmm4
pxor %xmm15, %xmm5
pxor %xmm15, %xmm6
pxor %xmm15, %xmm7
pxor %xmm15, %xmm8
pxor %xmm15, %xmm9
pxor %xmm15, %xmm10
movq $208, %r10
3: movdqu (%rdi, %r10), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
subq $16, %r10
jne 3b
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
pxor %xmm0, %xmm3
movdqu (%r8, %rax), %xmm0
pxor %xmm0, %xmm4
movdqu 16(%r8, %rax), %xmm0
pxor %xmm0, %xmm5
movdqu 32(%r8, %rax), %xmm0
pxor %xmm0, %xmm6
movdqu 48(%r8, %rax), %xmm0
pxor %xmm0, %xmm7
movdqu 64(%r8, %rax), %xmm0
pxor %xmm0, %xmm8
movdqu 80(%r8, %rax), %xmm0
pxor %xmm0, %xmm9
movdqu 96(%r8, %rax), %xmm0
pxor %xmm0, %xmm10
movdqu 112(%r8, %rax), %xmm0
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addq $8*16, %rax
addq $128, %rax
cmpq %r11, %rax
jbe 2b
1: cmpq %rax, %r9
je 5f
movdqu 16(%rdi), %xmm2
movdqu 32(%rdi), %xmm3
movdqu 48(%rdi), %xmm4
movdqu 64(%rdi), %xmm5
movdqu 80(%rdi), %xmm6
movdqu 96(%rdi), %xmm7
movdqu 112(%rdi), %xmm8
movdqu 128(%rdi), %xmm9
movdqu 144(%rdi), %xmm10
movdqu 160(%rdi), %xmm11
movdqu 176(%rdi), %xmm12
movdqu 192(%rdi), %xmm13
movdqu 208(%rdi), %xmm14
4: movdqu (%r8, %rax), %xmm1
pxor %xmm15, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
movdqu (%rdi), %xmm8
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
movdqu 112(%rdi), %xmm8
pxor %xmm0, %xmm1
movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */
movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */
addq $16, %rax
cmpq %rax, %r9
jne 4b
5: movdqu %xmm0, (%rdx)
xor %eax, %eax
ret
.size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256