nss/lib/freebl/intel-aes-x86-masm.asm - Issue 214183004: Implement AES in different modes of operation, using AES-NI and

Unified Diff: nss/lib/freebl/intel-aes-x86-masm.asm

Issue 214183004: Implement AES in different modes of operation, using AES-NI and (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/nss.git@master

Patch Set: Remove an assertion. ctr->cipher doesn't set *outlen. Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: nss/lib/freebl/intel-aes-x86-masm.asm

diff --git a/nss/lib/freebl/intel-aes-x86-masm.asm b/nss/lib/freebl/intel-aes-x86-masm.asm

new file mode 100644

index 0000000000000000000000000000000000000000..7d805e7660f15d20f89911424dc83dbb7d906dca

--- /dev/null

+++ b/nss/lib/freebl/intel-aes-x86-masm.asm

@@ -0,0 +1,949 @@

+; LICENSE:

+; This submission to NSS is to be made available under the terms of the

+; Mozilla Public License, v. 2.0. You can obtain one at http:

+; //mozilla.org/MPL/2.0/.

+;###############################################################################

+; Developers and authors:

+; Shay Gueron and Vlad Krasnov

+; Intel Corporation, Israel Development Centre, Haifa, Israel

+; Please send feedback directly to crypto.feedback.alias@intel.com

+.MODEL FLAT, C

+.XMM

+.DATA

+ALIGN 16

+Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh

+Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h

+Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh

+Lcon1 dd 1,1,1,1

+Lcon2 dd 1bh,1bh,1bh,1bh

+.CODE

+ctx textequ <ecx>

+output textequ <edx>

+input textequ <eax>

+inputLen textequ <edi>

+aes_rnd MACRO i

+ movdqu xmm7, [i*16 + ctx]

+ aesenc xmm0, xmm7

+ aesenc xmm1, xmm7

+ aesenc xmm2, xmm7

+ aesenc xmm3, xmm7

+ aesenc xmm4, xmm7

+ aesenc xmm5, xmm7

+ aesenc xmm6, xmm7

+ ENDM

+aes_last_rnd MACRO i

+ movdqu xmm7, [i*16 + ctx]

+ aesenclast xmm0, xmm7

+ aesenclast xmm1, xmm7

+ aesenclast xmm2, xmm7

+ aesenclast xmm3, xmm7

+ aesenclast xmm4, xmm7

+ aesenclast xmm5, xmm7

+ aesenclast xmm6, xmm7

+ ENDM

+aes_dec_rnd MACRO i

+ movdqu xmm7, [i*16 + ctx]

+ aesdec xmm0, xmm7

+ aesdec xmm1, xmm7

+ aesdec xmm2, xmm7

+ aesdec xmm3, xmm7

+ aesdec xmm4, xmm7

+ aesdec xmm5, xmm7

+ aesdec xmm6, xmm7

+ ENDM

+aes_dec_last_rnd MACRO i

+ movdqu xmm7, [i*16 + ctx]

+ aesdeclast xmm0, xmm7

+ aesdeclast xmm1, xmm7

+ aesdeclast xmm2, xmm7

+ aesdeclast xmm3, xmm7

+ aesdeclast xmm4, xmm7

+ aesdeclast xmm5, xmm7

+ aesdeclast xmm6, xmm7

+ ENDM

+gen_aes_ecb_func MACRO enc, rnds

+LOCAL loop7

+LOCAL loop1

+LOCAL bail

+ push inputLen

+ mov ctx, [esp + 2*4 + 0*4]

+ mov output, [esp + 2*4 + 1*4]

+ mov input, [esp + 2*4 + 4*4]

+ mov inputLen, [esp + 2*4 + 5*4]

+ lea ctx, [44+ctx]

+loop7:

+ cmp inputLen, 7*16

+ jb loop1

+ movdqu xmm0, [0*16 + input]

+ movdqu xmm1, [1*16 + input]

+ movdqu xmm2, [2*16 + input]

+ movdqu xmm3, [3*16 + input]

+ movdqu xmm4, [4*16 + input]

+ movdqu xmm5, [5*16 + input]

+ movdqu xmm6, [6*16 + input]

+ movdqu xmm7, [0*16 + ctx]

+ pxor xmm0, xmm7

+ pxor xmm1, xmm7

+ pxor xmm2, xmm7

+ pxor xmm3, xmm7

+ pxor xmm4, xmm7

+ pxor xmm5, xmm7

+ pxor xmm6, xmm7

+IF enc eq 1

+ rnd textequ <aes_rnd>

+ lastrnd textequ <aes_last_rnd>

+ aesinst textequ <aesenc>

+ aeslastinst textequ <aesenclast>

+ELSE

+ rnd textequ <aes_dec_rnd>

+ lastrnd textequ <aes_dec_last_rnd>

+ aesinst textequ <aesdec>

+ aeslastinst textequ <aesdeclast>

+ENDIF

+ i = 1

+ WHILE i LT rnds

+ rnd i

+ i = i+1

+ ENDM

+ lastrnd rnds

+ movdqu [0*16 + output], xmm0

+ movdqu [1*16 + output], xmm1

+ movdqu [2*16 + output], xmm2

+ movdqu [3*16 + output], xmm3

+ movdqu [4*16 + output], xmm4

+ movdqu [5*16 + output], xmm5

+ movdqu [6*16 + output], xmm6

+ lea input, [7*16 + input]

+ lea output, [7*16 + output]

+ sub inputLen, 7*16

+ jmp loop7

+loop1:

+ cmp inputLen, 1*16

+ jb bail

+ movdqu xmm0, [input]

+ movdqu xmm7, [0*16 + ctx]

+ pxor xmm0, xmm7

+ i = 1

+ WHILE i LT rnds

+ movdqu xmm7, [i*16 + ctx]

+ aesinst xmm0, xmm7

+ i = i+1

+ ENDM

+ movdqu xmm7, [rnds*16 + ctx]

+ aeslastinst xmm0, xmm7

+ movdqu [output], xmm0

+ lea input, [1*16 + input]

+ lea output, [1*16 + output]

+ sub inputLen, 1*16

+ jmp loop1

+bail:

+ xor eax, eax

+ pop inputLen

+ ret

+ENDM

+ALIGN 16

+intel_aes_encrypt_ecb_128 PROC

+gen_aes_ecb_func 1, 10

+intel_aes_encrypt_ecb_128 ENDP

+ALIGN 16

+intel_aes_encrypt_ecb_192 PROC

+gen_aes_ecb_func 1, 12

+intel_aes_encrypt_ecb_192 ENDP

+ALIGN 16

+intel_aes_encrypt_ecb_256 PROC

+gen_aes_ecb_func 1, 14

+intel_aes_encrypt_ecb_256 ENDP

+ALIGN 16

+intel_aes_decrypt_ecb_128 PROC

+gen_aes_ecb_func 0, 10

+intel_aes_decrypt_ecb_128 ENDP

+ALIGN 16

+intel_aes_decrypt_ecb_192 PROC

+gen_aes_ecb_func 0, 12

+intel_aes_decrypt_ecb_192 ENDP

+ALIGN 16

+intel_aes_decrypt_ecb_256 PROC

+gen_aes_ecb_func 0, 14

+intel_aes_decrypt_ecb_256 ENDP

+KEY textequ <ecx>

+KS textequ <edx>

+ITR textequ <eax>

+ALIGN 16

+intel_aes_encrypt_init_128 PROC

+ mov KEY, [esp + 1*4 + 0*4]

+ mov KS, [esp + 1*4 + 1*4]

+ movdqu xmm1, [KEY]

+ movdqu [KS], xmm1

+ movdqa xmm2, xmm1

+ lea ITR, Lcon1

+ movdqa xmm0, [ITR]

+ lea ITR, Lmask

+ movdqa xmm4, [ITR]

+ mov ITR, 8

+Lenc_128_ks_loop:

+ lea KS, [16 + KS]

+ dec ITR

+ pshufb xmm2, xmm4

+ aesenclast xmm2, xmm0

+ pslld xmm0, 1

+ movdqa xmm3, xmm1

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pxor xmm1, xmm2

+ movdqu [KS], xmm1

+ movdqa xmm2, xmm1

+ jne Lenc_128_ks_loop

+ lea ITR, Lcon2

+ movdqa xmm0, [ITR]

+ pshufb xmm2, xmm4

+ aesenclast xmm2, xmm0

+ pslld xmm0, 1

+ movdqa xmm3, xmm1

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pxor xmm1, xmm2

+ movdqu [16 + KS], xmm1

+ movdqa xmm2, xmm1

+ pshufb xmm2, xmm4

+ aesenclast xmm2, xmm0

+ movdqa xmm3, xmm1

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pslldq xmm3, 4

+ pxor xmm1, xmm3

+ pxor xmm1, xmm2

+ movdqu [32 + KS], xmm1

+ movdqa xmm2, xmm1

+ ret

+intel_aes_encrypt_init_128 ENDP

+ALIGN 16

+intel_aes_decrypt_init_128 PROC

+ mov KEY, [esp + 1*4 + 0*4]

+ mov KS, [esp + 1*4 + 1*4]

+ push KS

+ push KEY

+ call intel_aes_encrypt_init_128

+ pop KEY

+ pop KS

+ movdqu xmm0, [0*16 + KS]

+ movdqu xmm1, [10*16 + KS]

+ movdqu [10*16 + KS], xmm0

+ movdqu [0*16 + KS], xmm1

+ i = 1

+ WHILE i LT 5

+ movdqu xmm0, [i*16 + KS]

+ movdqu xmm1, [(10-i)*16 + KS]

+ aesimc xmm0, xmm0

+ aesimc xmm1, xmm1

+ movdqu [(10-i)*16 + KS], xmm0

+ movdqu [i*16 + KS], xmm1

+ i = i+1

+ ENDM

+ movdqu xmm0, [5*16 + KS]

+ aesimc xmm0, xmm0

+ movdqu [5*16 + KS], xmm0

+ ret

+intel_aes_decrypt_init_128 ENDP

+ALIGN 16

+intel_aes_encrypt_init_192 PROC

+ mov KEY, [esp + 1*4 + 0*4]

+ mov KS, [esp + 1*4 + 1*4]

+ pxor xmm3, xmm3

+ movdqu xmm1, [KEY]

+ pinsrd xmm3, DWORD PTR [16 + KEY], 0

+ pinsrd xmm3, DWORD PTR [20 + KEY], 1

+ movdqu [KS], xmm1

+ movdqa xmm5, xmm3

+ lea ITR, Lcon1

+ movdqu xmm0, [ITR]

+ lea ITR, Lmask192

+ movdqu xmm4, [ITR]

+ mov ITR, 4

+Lenc_192_ks_loop:

+ movdqa xmm2, xmm3

+ pshufb xmm2, xmm4

+ aesenclast xmm2, xmm0

+ pslld xmm0, 1

+ movdqa xmm6, xmm1

+ movdqa xmm7, xmm3

+ pslldq xmm6, 4

+ pslldq xmm7, 4

+ pxor xmm1, xmm6

+ pxor xmm3, xmm7

+ pslldq xmm6, 4

+ pxor xmm1, xmm6

+ pslldq xmm6, 4

+ pxor xmm1, xmm6

+ pxor xmm1, xmm2

+ pshufd xmm2, xmm1, 0ffh

+ pxor xmm3, xmm2

+ movdqa xmm6, xmm1

+ shufpd xmm5, xmm1, 00h

+ shufpd xmm6, xmm3, 01h

+ movdqu [16 + KS], xmm5

+ movdqu [32 + KS], xmm6

+ movdqa xmm2, xmm3

+ pshufb xmm2, xmm4

+ aesenclast xmm2, xmm0

+ pslld xmm0, 1

+ movdqa xmm6, xmm1

+ movdqa xmm7, xmm3

+ pslldq xmm6, 4

+ pslldq xmm7, 4

+ pxor xmm1, xmm6

+ pxor xmm3, xmm7

+ pslldq xmm6, 4

+ pxor xmm1, xmm6

+ pslldq xmm6, 4

+ pxor xmm1, xmm6

+ pxor xmm1, xmm2

+ pshufd xmm2, xmm1, 0ffh

+ pxor xmm3, xmm2

+ movdqu [48 + KS], xmm1

+ movdqa xmm5, xmm3

+ lea KS, [48 + KS]

+ dec ITR

+ jnz Lenc_192_ks_loop

+ movdqu [16 + KS], xmm5

+ret

+intel_aes_encrypt_init_192 ENDP

+ALIGN 16

+intel_aes_decrypt_init_192 PROC

+ mov KEY, [esp + 1*4 + 0*4]

+ mov KS, [esp + 1*4 + 1*4]

+ push KS

+ push KEY

+ call intel_aes_encrypt_init_192

+ pop KEY

+ pop KS

+ movdqu xmm0, [0*16 + KS]

+ movdqu xmm1, [12*16 + KS]

+ movdqu [12*16 + KS], xmm0

+ movdqu [0*16 + KS], xmm1

+ i = 1

+ WHILE i LT 6

+ movdqu xmm0, [i*16 + KS]

+ movdqu xmm1, [(12-i)*16 + KS]

+ aesimc xmm0, xmm0

+ aesimc xmm1, xmm1

+ movdqu [(12-i)*16 + KS], xmm0

+ movdqu [i*16 + KS], xmm1

+ i = i+1

+ ENDM

+ movdqu xmm0, [6*16 + KS]

+ aesimc xmm0, xmm0

+ movdqu [6*16 + KS], xmm0

+ ret

+intel_aes_decrypt_init_192 ENDP

+ALIGN 16

+intel_aes_encrypt_init_256 PROC

+ mov KEY, [esp + 1*4 + 0*4]

+ mov KS, [esp + 1*4 + 1*4]

+ movdqu xmm1, [16*0 + KEY]

+ movdqu xmm3, [16*1 + KEY]

+ movdqu [16*0 + KS], xmm1

+ movdqu [16*1 + KS], xmm3

+ lea ITR, Lcon1

+ movdqu xmm0, [ITR]

+ lea ITR, Lmask256

+ movdqu xmm5, [ITR]

+ pxor xmm6, xmm6

+ mov ITR, 6

+Lenc_256_ks_loop:

+ movdqa xmm2, xmm3

+ pshufb xmm2, xmm5

+ aesenclast xmm2, xmm0

+ pslld xmm0, 1

+ movdqa xmm4, xmm1

+ pslldq xmm4, 4

+ pxor xmm1, xmm4

+ pslldq xmm4, 4

+ pxor xmm1, xmm4

+ pslldq xmm4, 4

+ pxor xmm1, xmm4

+ pxor xmm1, xmm2

+ movdqu [16*2 + KS], xmm1

+ pshufd xmm2, xmm1, 0ffh

+ aesenclast xmm2, xmm6

+ movdqa xmm4, xmm3

+ pslldq xmm4, 4

+ pxor xmm3, xmm4

+ pslldq xmm4, 4

+ pxor xmm3, xmm4

+ pslldq xmm4, 4

+ pxor xmm3, xmm4

+ pxor xmm3, xmm2

+ movdqu [16*3 + KS], xmm3

+ lea KS, [32 + KS]

+ dec ITR

+ jnz Lenc_256_ks_loop

+ movdqa xmm2, xmm3

+ pshufb xmm2, xmm5

+ aesenclast xmm2, xmm0

+ movdqa xmm4, xmm1

+ pslldq xmm4, 4

+ pxor xmm1, xmm4

+ pslldq xmm4, 4

+ pxor xmm1, xmm4

+ pslldq xmm4, 4

+ pxor xmm1, xmm4

+ pxor xmm1, xmm2

+ movdqu [16*2 + KS], xmm1

+ ret

+intel_aes_encrypt_init_256 ENDP

+ALIGN 16

+intel_aes_decrypt_init_256 PROC

+ mov KEY, [esp + 1*4 + 0*4]

+ mov KS, [esp + 1*4 + 1*4]

+ push KS

+ push KEY

+ call intel_aes_encrypt_init_256

+ pop KEY

+ pop KS

+ movdqu xmm0, [0*16 + KS]

+ movdqu xmm1, [14*16 + KS]

+ movdqu [14*16 + KS], xmm0

+ movdqu [0*16 + KS], xmm1

+ i = 1

+ WHILE i LT 7

+ movdqu xmm0, [i*16 + KS]

+ movdqu xmm1, [(14-i)*16 + KS]

+ aesimc xmm0, xmm0

+ aesimc xmm1, xmm1

+ movdqu [(14-i)*16 + KS], xmm0

+ movdqu [i*16 + KS], xmm1

+ i = i+1

+ ENDM

+ movdqu xmm0, [7*16 + KS]

+ aesimc xmm0, xmm0

+ movdqu [7*16 + KS], xmm0

+ ret

+intel_aes_decrypt_init_256 ENDP

+gen_aes_cbc_enc_func MACRO rnds

+LOCAL loop1

+LOCAL bail

+ push inputLen

+ mov ctx, [esp + 2*4 + 0*4]

+ mov output, [esp + 2*4 + 1*4]

+ mov input, [esp + 2*4 + 4*4]

+ mov inputLen, [esp + 2*4 + 5*4]

+ lea ctx, [44+ctx]

+ movdqu xmm0, [-32+ctx]

+ movdqu xmm2, [0*16 + ctx]

+ movdqu xmm3, [1*16 + ctx]

+ movdqu xmm4, [2*16 + ctx]

+ movdqu xmm5, [3*16 + ctx]

+ movdqu xmm6, [4*16 + ctx]

+loop1:

+ cmp inputLen, 1*16

+ jb bail

+ movdqu xmm1, [input]

+ pxor xmm1, xmm2

+ pxor xmm0, xmm1

+ aesenc xmm0, xmm3

+ aesenc xmm0, xmm4

+ aesenc xmm0, xmm5

+ aesenc xmm0, xmm6

+ i = 5

+ WHILE i LT rnds

+ movdqu xmm7, [i*16 + ctx]

+ aesenc xmm0, xmm7

+ i = i+1

+ ENDM

+ movdqu xmm7, [rnds*16 + ctx]

+ aesenclast xmm0, xmm7

+ movdqu [output], xmm0

+ lea input, [1*16 + input]

+ lea output, [1*16 + output]

+ sub inputLen, 1*16

+ jmp loop1

+bail:

+ movdqu [-32+ctx], xmm0

+ xor eax, eax

+ pop inputLen

+ ret

+ENDM

+gen_aes_cbc_dec_func MACRO rnds

+LOCAL loop7

+LOCAL loop1

+LOCAL dec1

+LOCAL bail

+ push inputLen

+ mov ctx, [esp + 2*4 + 0*4]

+ mov output, [esp + 2*4 + 1*4]

+ mov input, [esp + 2*4 + 4*4]

+ mov inputLen, [esp + 2*4 + 5*4]

+ lea ctx, [44+ctx]

+loop7:

+ cmp inputLen, 7*16

+ jb dec1

+ movdqu xmm0, [0*16 + input]

+ movdqu xmm1, [1*16 + input]

+ movdqu xmm2, [2*16 + input]

+ movdqu xmm3, [3*16 + input]

+ movdqu xmm4, [4*16 + input]

+ movdqu xmm5, [5*16 + input]

+ movdqu xmm6, [6*16 + input]

+ movdqu xmm7, [0*16 + ctx]

+ pxor xmm0, xmm7

+ pxor xmm1, xmm7

+ pxor xmm2, xmm7

+ pxor xmm3, xmm7

+ pxor xmm4, xmm7

+ pxor xmm5, xmm7

+ pxor xmm6, xmm7

+ i = 1

+ WHILE i LT rnds

+ aes_dec_rnd i

+ i = i+1

+ ENDM

+ aes_dec_last_rnd rnds

+ movdqu xmm7, [-32 + ctx]

+ pxor xmm0, xmm7

+ movdqu xmm7, [0*16 + input]

+ pxor xmm1, xmm7

+ movdqu xmm7, [1*16 + input]

+ pxor xmm2, xmm7

+ movdqu xmm7, [2*16 + input]

+ pxor xmm3, xmm7

+ movdqu xmm7, [3*16 + input]

+ pxor xmm4, xmm7

+ movdqu xmm7, [4*16 + input]

+ pxor xmm5, xmm7

+ movdqu xmm7, [5*16 + input]

+ pxor xmm6, xmm7

+ movdqu xmm7, [6*16 + input]

+ movdqu [0*16 + output], xmm0

+ movdqu [1*16 + output], xmm1

+ movdqu [2*16 + output], xmm2

+ movdqu [3*16 + output], xmm3

+ movdqu [4*16 + output], xmm4

+ movdqu [5*16 + output], xmm5

+ movdqu [6*16 + output], xmm6

+ movdqu [-32 + ctx], xmm7

+ lea input, [7*16 + input]

+ lea output, [7*16 + output]

+ sub inputLen, 7*16

+ jmp loop7

+dec1:

+ movdqu xmm3, [-32 + ctx]

+loop1:

+ cmp inputLen, 1*16

+ jb bail

+ movdqu xmm0, [input]

+ movdqa xmm4, xmm0

+ movdqu xmm7, [0*16 + ctx]

+ pxor xmm0, xmm7

+ i = 1

+ WHILE i LT rnds

+ movdqu xmm7, [i*16 + ctx]

+ aesdec xmm0, xmm7

+ i = i+1

+ ENDM

+ movdqu xmm7, [rnds*16 + ctx]

+ aesdeclast xmm0, xmm7

+ pxor xmm3, xmm0

+ movdqu [output], xmm3

+ movdqa xmm3, xmm4

+ lea input, [1*16 + input]

+ lea output, [1*16 + output]

+ sub inputLen, 1*16

+ jmp loop1

+bail:

+ movdqu [-32 + ctx], xmm3

+ xor eax, eax

+ pop inputLen

+ ret

+ENDM

+ALIGN 16

+intel_aes_encrypt_cbc_128 PROC

+gen_aes_cbc_enc_func 10

+intel_aes_encrypt_cbc_128 ENDP

+ALIGN 16

+intel_aes_encrypt_cbc_192 PROC

+gen_aes_cbc_enc_func 12

+intel_aes_encrypt_cbc_192 ENDP

+ALIGN 16

+intel_aes_encrypt_cbc_256 PROC

+gen_aes_cbc_enc_func 14

+intel_aes_encrypt_cbc_256 ENDP

+ALIGN 16

+intel_aes_decrypt_cbc_128 PROC

+gen_aes_cbc_dec_func 10

+intel_aes_decrypt_cbc_128 ENDP

+ALIGN 16

+intel_aes_decrypt_cbc_192 PROC

+gen_aes_cbc_dec_func 12

+intel_aes_decrypt_cbc_192 ENDP

+ALIGN 16

+intel_aes_decrypt_cbc_256 PROC

+gen_aes_cbc_dec_func 14

+intel_aes_decrypt_cbc_256 ENDP

+ctrCtx textequ <esi>

+CTR textequ <ebx>

+gen_aes_ctr_func MACRO rnds

+LOCAL loop7

+LOCAL loop1

+LOCAL enc1

+LOCAL bail

+ push inputLen

+ push ctrCtx

+ push CTR

+ push ebp

+ mov ctrCtx, [esp + 4*5 + 0*4]

+ mov output, [esp + 4*5 + 1*4]

+ mov input, [esp + 4*5 + 4*4]

+ mov inputLen, [esp + 4*5 + 5*4]

+ mov ctx, [4+ctrCtx]

+ lea ctx, [44+ctx]

+ mov ebp, esp

+ sub esp, 7*16

+ and esp, -16

+ movdqu xmm0, [8+ctrCtx]

+ mov ctrCtx, [ctrCtx + 8 + 3*4]

+ bswap ctrCtx

+ movdqu xmm1, [ctx + 0*16]

+ pxor xmm0, xmm1

+ movdqa [esp + 0*16], xmm0

+ movdqa [esp + 1*16], xmm0

+ movdqa [esp + 2*16], xmm0

+ movdqa [esp + 3*16], xmm0

+ movdqa [esp + 4*16], xmm0

+ movdqa [esp + 5*16], xmm0

+ movdqa [esp + 6*16], xmm0

+ inc ctrCtx

+ mov CTR, ctrCtx

+ bswap CTR

+ xor CTR, [ctx + 3*4]

+ mov [esp + 1*16 + 3*4], CTR

+ inc ctrCtx

+ mov CTR, ctrCtx

+ bswap CTR

+ xor CTR, [ctx + 3*4]

+ mov [esp + 2*16 + 3*4], CTR

+ inc ctrCtx

+ mov CTR, ctrCtx

+ bswap CTR

+ xor CTR, [ctx + 3*4]

+ mov [esp + 3*16 + 3*4], CTR

+ inc ctrCtx

+ mov CTR, ctrCtx

+ bswap CTR

+ xor CTR, [ctx + 3*4]

+ mov [esp + 4*16 + 3*4], CTR

+ inc ctrCtx

+ mov CTR, ctrCtx

+ bswap CTR

+ xor CTR, [ctx + 3*4]

+ mov [esp + 5*16 + 3*4], CTR

+ inc ctrCtx

+ mov CTR, ctrCtx

+ bswap CTR

+ xor CTR, [ctx + 3*4]

+ mov [esp + 6*16 + 3*4], CTR

+loop7:

+ cmp inputLen, 7*16

+ jb loop1

+ movdqu xmm0, [0*16 + esp]

+ movdqu xmm1, [1*16 + esp]

+ movdqu xmm2, [2*16 + esp]

+ movdqu xmm3, [3*16 + esp]

+ movdqu xmm4, [4*16 + esp]

+ movdqu xmm5, [5*16 + esp]

+ movdqu xmm6, [6*16 + esp]

+ i = 1

+ WHILE i LE 7

+ aes_rnd i

+ inc ctrCtx

+ mov CTR, ctrCtx

+ bswap CTR

+ xor CTR, [ctx + 3*4]

+ mov [esp + (i-1)*16 + 3*4], CTR

+ i = i+1

+ ENDM

+ WHILE i LT rnds

+ aes_rnd i

+ i = i+1

+ ENDM

+ aes_last_rnd rnds

+ movdqu xmm7, [0*16 + input]

+ pxor xmm0, xmm7

+ movdqu xmm7, [1*16 + input]

+ pxor xmm1, xmm7

+ movdqu xmm7, [2*16 + input]

+ pxor xmm2, xmm7

+ movdqu xmm7, [3*16 + input]

+ pxor xmm3, xmm7

+ movdqu xmm7, [4*16 + input]

+ pxor xmm4, xmm7

+ movdqu xmm7, [5*16 + input]

+ pxor xmm5, xmm7

+ movdqu xmm7, [6*16 + input]

+ pxor xmm6, xmm7

+ movdqu [0*16 + output], xmm0

+ movdqu [1*16 + output], xmm1

+ movdqu [2*16 + output], xmm2

+ movdqu [3*16 + output], xmm3

+ movdqu [4*16 + output], xmm4

+ movdqu [5*16 + output], xmm5

+ movdqu [6*16 + output], xmm6

+ lea input, [7*16 + input]

+ lea output, [7*16 + output]

+ sub inputLen, 7*16

+ jmp loop7

+loop1:

+ cmp inputLen, 1*16

+ jb bail

+ movdqu xmm0, [esp]

+ add esp, 16

+ i = 1

+ WHILE i LT rnds

+ movdqu xmm7, [i*16 + ctx]

+ aesenc xmm0, xmm7

+ i = i+1

+ ENDM

+ movdqu xmm7, [rnds*16 + ctx]

+ aesenclast xmm0, xmm7

+ movdqu xmm7, [input]

+ pxor xmm0, xmm7

+ movdqu [output], xmm0

+ lea input, [1*16 + input]

+ lea output, [1*16 + output]

+ sub inputLen, 1*16

+ jmp loop1

+bail:

+ mov ctrCtx, [ebp + 4*5 + 0*4]

+ movdqu xmm0, [esp]

+ movdqu xmm1, [ctx + 0*16]

+ pxor xmm0, xmm1

+ movdqu [8+ctrCtx], xmm0

+ xor eax, eax

+ mov esp, ebp

+ pop ebp

+ pop CTR

+ pop ctrCtx

+ pop inputLen

+ ret

+ENDM

+ALIGN 16

+intel_aes_encrypt_ctr_128 PROC

+gen_aes_ctr_func 10

+intel_aes_encrypt_ctr_128 ENDP

+ALIGN 16

+intel_aes_encrypt_ctr_192 PROC

+gen_aes_ctr_func 12

+intel_aes_encrypt_ctr_192 ENDP

+ALIGN 16

+intel_aes_encrypt_ctr_256 PROC

+gen_aes_ctr_func 14

+intel_aes_encrypt_ctr_256 ENDP

+END

« no previous file with comments | « nss/lib/freebl/intel-aes-x64-masm.asm ('k') | nss/lib/freebl/intel-gcm.h » ('j') | no next file with comments »