| Index: nss/lib/freebl/intel-aes-x86-masm.asm
|
| diff --git a/nss/lib/freebl/intel-aes-x86-masm.asm b/nss/lib/freebl/intel-aes-x86-masm.asm
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..7d805e7660f15d20f89911424dc83dbb7d906dca
|
| --- /dev/null
|
| +++ b/nss/lib/freebl/intel-aes-x86-masm.asm
|
| @@ -0,0 +1,949 @@
|
| +; LICENSE:
|
| +; This submission to NSS is to be made available under the terms of the
|
| +; Mozilla Public License, v. 2.0. You can obtain one at http:
|
| +; //mozilla.org/MPL/2.0/.
|
| +;###############################################################################
|
| +; Copyright(c) 2014, Intel Corp.
|
| +; Developers and authors:
|
| +; Shay Gueron and Vlad Krasnov
|
| +; Intel Corporation, Israel Development Centre, Haifa, Israel
|
| +; Please send feedback directly to crypto.feedback.alias@intel.com
|
| +
|
| +
|
| +.MODEL FLAT, C
|
| +.XMM
|
| +
|
| +.DATA
|
| +ALIGN 16
|
| +Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
|
| +Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
|
| +Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
|
| +Lcon1 dd 1,1,1,1
|
| +Lcon2 dd 1bh,1bh,1bh,1bh
|
| +
|
| +.CODE
|
| +
|
| +ctx textequ <ecx>
|
| +output textequ <edx>
|
| +input textequ <eax>
|
| +inputLen textequ <edi>
|
| +
|
| +
|
| +aes_rnd MACRO i
|
| + movdqu xmm7, [i*16 + ctx]
|
| + aesenc xmm0, xmm7
|
| + aesenc xmm1, xmm7
|
| + aesenc xmm2, xmm7
|
| + aesenc xmm3, xmm7
|
| + aesenc xmm4, xmm7
|
| + aesenc xmm5, xmm7
|
| + aesenc xmm6, xmm7
|
| + ENDM
|
| +
|
| +aes_last_rnd MACRO i
|
| + movdqu xmm7, [i*16 + ctx]
|
| + aesenclast xmm0, xmm7
|
| + aesenclast xmm1, xmm7
|
| + aesenclast xmm2, xmm7
|
| + aesenclast xmm3, xmm7
|
| + aesenclast xmm4, xmm7
|
| + aesenclast xmm5, xmm7
|
| + aesenclast xmm6, xmm7
|
| + ENDM
|
| +
|
| +aes_dec_rnd MACRO i
|
| + movdqu xmm7, [i*16 + ctx]
|
| + aesdec xmm0, xmm7
|
| + aesdec xmm1, xmm7
|
| + aesdec xmm2, xmm7
|
| + aesdec xmm3, xmm7
|
| + aesdec xmm4, xmm7
|
| + aesdec xmm5, xmm7
|
| + aesdec xmm6, xmm7
|
| + ENDM
|
| +
|
| +aes_dec_last_rnd MACRO i
|
| + movdqu xmm7, [i*16 + ctx]
|
| + aesdeclast xmm0, xmm7
|
| + aesdeclast xmm1, xmm7
|
| + aesdeclast xmm2, xmm7
|
| + aesdeclast xmm3, xmm7
|
| + aesdeclast xmm4, xmm7
|
| + aesdeclast xmm5, xmm7
|
| + aesdeclast xmm6, xmm7
|
| + ENDM
|
| +
|
| +
|
| +gen_aes_ecb_func MACRO enc, rnds
|
| +
|
| +LOCAL loop7
|
| +LOCAL loop1
|
| +LOCAL bail
|
| +
|
| + push inputLen
|
| +
|
| + mov ctx, [esp + 2*4 + 0*4]
|
| + mov output, [esp + 2*4 + 1*4]
|
| + mov input, [esp + 2*4 + 4*4]
|
| + mov inputLen, [esp + 2*4 + 5*4]
|
| +
|
| + lea ctx, [44+ctx]
|
| +
|
| +loop7:
|
| + cmp inputLen, 7*16
|
| + jb loop1
|
| +
|
| + movdqu xmm0, [0*16 + input]
|
| + movdqu xmm1, [1*16 + input]
|
| + movdqu xmm2, [2*16 + input]
|
| + movdqu xmm3, [3*16 + input]
|
| + movdqu xmm4, [4*16 + input]
|
| + movdqu xmm5, [5*16 + input]
|
| + movdqu xmm6, [6*16 + input]
|
| +
|
| + movdqu xmm7, [0*16 + ctx]
|
| + pxor xmm0, xmm7
|
| + pxor xmm1, xmm7
|
| + pxor xmm2, xmm7
|
| + pxor xmm3, xmm7
|
| + pxor xmm4, xmm7
|
| + pxor xmm5, xmm7
|
| + pxor xmm6, xmm7
|
| +
|
| +IF enc eq 1
|
| + rnd textequ <aes_rnd>
|
| + lastrnd textequ <aes_last_rnd>
|
| + aesinst textequ <aesenc>
|
| + aeslastinst textequ <aesenclast>
|
| +ELSE
|
| + rnd textequ <aes_dec_rnd>
|
| + lastrnd textequ <aes_dec_last_rnd>
|
| + aesinst textequ <aesdec>
|
| + aeslastinst textequ <aesdeclast>
|
| +ENDIF
|
| +
|
| + i = 1
|
| + WHILE i LT rnds
|
| + rnd i
|
| + i = i+1
|
| + ENDM
|
| + lastrnd rnds
|
| +
|
| + movdqu [0*16 + output], xmm0
|
| + movdqu [1*16 + output], xmm1
|
| + movdqu [2*16 + output], xmm2
|
| + movdqu [3*16 + output], xmm3
|
| + movdqu [4*16 + output], xmm4
|
| + movdqu [5*16 + output], xmm5
|
| + movdqu [6*16 + output], xmm6
|
| +
|
| + lea input, [7*16 + input]
|
| + lea output, [7*16 + output]
|
| + sub inputLen, 7*16
|
| + jmp loop7
|
| +
|
| +loop1:
|
| + cmp inputLen, 1*16
|
| + jb bail
|
| +
|
| + movdqu xmm0, [input]
|
| + movdqu xmm7, [0*16 + ctx]
|
| + pxor xmm0, xmm7
|
| +
|
| + i = 1
|
| + WHILE i LT rnds
|
| + movdqu xmm7, [i*16 + ctx]
|
| + aesinst xmm0, xmm7
|
| + i = i+1
|
| + ENDM
|
| + movdqu xmm7, [rnds*16 + ctx]
|
| + aeslastinst xmm0, xmm7
|
| +
|
| + movdqu [output], xmm0
|
| +
|
| + lea input, [1*16 + input]
|
| + lea output, [1*16 + output]
|
| + sub inputLen, 1*16
|
| + jmp loop1
|
| +
|
| +bail:
|
| + xor eax, eax
|
| + pop inputLen
|
| + ret
|
| +
|
| +ENDM
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_ecb_128 PROC
|
| +gen_aes_ecb_func 1, 10
|
| +intel_aes_encrypt_ecb_128 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_ecb_192 PROC
|
| +gen_aes_ecb_func 1, 12
|
| +intel_aes_encrypt_ecb_192 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_ecb_256 PROC
|
| +gen_aes_ecb_func 1, 14
|
| +intel_aes_encrypt_ecb_256 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_ecb_128 PROC
|
| +gen_aes_ecb_func 0, 10
|
| +intel_aes_decrypt_ecb_128 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_ecb_192 PROC
|
| +gen_aes_ecb_func 0, 12
|
| +intel_aes_decrypt_ecb_192 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_ecb_256 PROC
|
| +gen_aes_ecb_func 0, 14
|
| +intel_aes_decrypt_ecb_256 ENDP
|
| +
|
| +
|
| +KEY textequ <ecx>
|
| +KS textequ <edx>
|
| +ITR textequ <eax>
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_init_128 PROC
|
| +
|
| + mov KEY, [esp + 1*4 + 0*4]
|
| + mov KS, [esp + 1*4 + 1*4]
|
| +
|
| +
|
| + movdqu xmm1, [KEY]
|
| + movdqu [KS], xmm1
|
| + movdqa xmm2, xmm1
|
| +
|
| + lea ITR, Lcon1
|
| + movdqa xmm0, [ITR]
|
| + lea ITR, Lmask
|
| + movdqa xmm4, [ITR]
|
| +
|
| + mov ITR, 8
|
| +
|
| +Lenc_128_ks_loop:
|
| + lea KS, [16 + KS]
|
| + dec ITR
|
| +
|
| + pshufb xmm2, xmm4
|
| + aesenclast xmm2, xmm0
|
| + pslld xmm0, 1
|
| + movdqa xmm3, xmm1
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pxor xmm1, xmm2
|
| + movdqu [KS], xmm1
|
| + movdqa xmm2, xmm1
|
| +
|
| + jne Lenc_128_ks_loop
|
| +
|
| + lea ITR, Lcon2
|
| + movdqa xmm0, [ITR]
|
| +
|
| + pshufb xmm2, xmm4
|
| + aesenclast xmm2, xmm0
|
| + pslld xmm0, 1
|
| + movdqa xmm3, xmm1
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pxor xmm1, xmm2
|
| + movdqu [16 + KS], xmm1
|
| + movdqa xmm2, xmm1
|
| +
|
| + pshufb xmm2, xmm4
|
| + aesenclast xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pslldq xmm3, 4
|
| + pxor xmm1, xmm3
|
| + pxor xmm1, xmm2
|
| + movdqu [32 + KS], xmm1
|
| + movdqa xmm2, xmm1
|
| +
|
| + ret
|
| +intel_aes_encrypt_init_128 ENDP
|
| +
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_init_128 PROC
|
| +
|
| + mov KEY, [esp + 1*4 + 0*4]
|
| + mov KS, [esp + 1*4 + 1*4]
|
| +
|
| + push KS
|
| + push KEY
|
| +
|
| + call intel_aes_encrypt_init_128
|
| +
|
| + pop KEY
|
| + pop KS
|
| +
|
| + movdqu xmm0, [0*16 + KS]
|
| + movdqu xmm1, [10*16 + KS]
|
| + movdqu [10*16 + KS], xmm0
|
| + movdqu [0*16 + KS], xmm1
|
| +
|
| + i = 1
|
| + WHILE i LT 5
|
| + movdqu xmm0, [i*16 + KS]
|
| + movdqu xmm1, [(10-i)*16 + KS]
|
| +
|
| + aesimc xmm0, xmm0
|
| + aesimc xmm1, xmm1
|
| +
|
| + movdqu [(10-i)*16 + KS], xmm0
|
| + movdqu [i*16 + KS], xmm1
|
| +
|
| + i = i+1
|
| + ENDM
|
| +
|
| + movdqu xmm0, [5*16 + KS]
|
| + aesimc xmm0, xmm0
|
| + movdqu [5*16 + KS], xmm0
|
| + ret
|
| +intel_aes_decrypt_init_128 ENDP
|
| +
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_init_192 PROC
|
| +
|
| + mov KEY, [esp + 1*4 + 0*4]
|
| + mov KS, [esp + 1*4 + 1*4]
|
| +
|
| + pxor xmm3, xmm3
|
| + movdqu xmm1, [KEY]
|
| + pinsrd xmm3, DWORD PTR [16 + KEY], 0
|
| + pinsrd xmm3, DWORD PTR [20 + KEY], 1
|
| +
|
| + movdqu [KS], xmm1
|
| + movdqa xmm5, xmm3
|
| +
|
| + lea ITR, Lcon1
|
| + movdqu xmm0, [ITR]
|
| + lea ITR, Lmask192
|
| + movdqu xmm4, [ITR]
|
| +
|
| + mov ITR, 4
|
| +
|
| +Lenc_192_ks_loop:
|
| + movdqa xmm2, xmm3
|
| + pshufb xmm2, xmm4
|
| + aesenclast xmm2, xmm0
|
| + pslld xmm0, 1
|
| +
|
| + movdqa xmm6, xmm1
|
| + movdqa xmm7, xmm3
|
| + pslldq xmm6, 4
|
| + pslldq xmm7, 4
|
| + pxor xmm1, xmm6
|
| + pxor xmm3, xmm7
|
| + pslldq xmm6, 4
|
| + pxor xmm1, xmm6
|
| + pslldq xmm6, 4
|
| + pxor xmm1, xmm6
|
| + pxor xmm1, xmm2
|
| + pshufd xmm2, xmm1, 0ffh
|
| + pxor xmm3, xmm2
|
| +
|
| + movdqa xmm6, xmm1
|
| + shufpd xmm5, xmm1, 00h
|
| + shufpd xmm6, xmm3, 01h
|
| +
|
| + movdqu [16 + KS], xmm5
|
| + movdqu [32 + KS], xmm6
|
| +
|
| + movdqa xmm2, xmm3
|
| + pshufb xmm2, xmm4
|
| + aesenclast xmm2, xmm0
|
| + pslld xmm0, 1
|
| +
|
| + movdqa xmm6, xmm1
|
| + movdqa xmm7, xmm3
|
| + pslldq xmm6, 4
|
| + pslldq xmm7, 4
|
| + pxor xmm1, xmm6
|
| + pxor xmm3, xmm7
|
| + pslldq xmm6, 4
|
| + pxor xmm1, xmm6
|
| + pslldq xmm6, 4
|
| + pxor xmm1, xmm6
|
| + pxor xmm1, xmm2
|
| + pshufd xmm2, xmm1, 0ffh
|
| + pxor xmm3, xmm2
|
| +
|
| + movdqu [48 + KS], xmm1
|
| + movdqa xmm5, xmm3
|
| +
|
| + lea KS, [48 + KS]
|
| +
|
| + dec ITR
|
| + jnz Lenc_192_ks_loop
|
| +
|
| + movdqu [16 + KS], xmm5
|
| +ret
|
| +intel_aes_encrypt_init_192 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_init_192 PROC
|
| + mov KEY, [esp + 1*4 + 0*4]
|
| + mov KS, [esp + 1*4 + 1*4]
|
| +
|
| + push KS
|
| + push KEY
|
| +
|
| + call intel_aes_encrypt_init_192
|
| +
|
| + pop KEY
|
| + pop KS
|
| +
|
| + movdqu xmm0, [0*16 + KS]
|
| + movdqu xmm1, [12*16 + KS]
|
| + movdqu [12*16 + KS], xmm0
|
| + movdqu [0*16 + KS], xmm1
|
| +
|
| + i = 1
|
| + WHILE i LT 6
|
| + movdqu xmm0, [i*16 + KS]
|
| + movdqu xmm1, [(12-i)*16 + KS]
|
| +
|
| + aesimc xmm0, xmm0
|
| + aesimc xmm1, xmm1
|
| +
|
| + movdqu [(12-i)*16 + KS], xmm0
|
| + movdqu [i*16 + KS], xmm1
|
| +
|
| + i = i+1
|
| + ENDM
|
| +
|
| + movdqu xmm0, [6*16 + KS]
|
| + aesimc xmm0, xmm0
|
| + movdqu [6*16 + KS], xmm0
|
| + ret
|
| +intel_aes_decrypt_init_192 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_init_256 PROC
|
| +
|
| + mov KEY, [esp + 1*4 + 0*4]
|
| + mov KS, [esp + 1*4 + 1*4]
|
| + movdqu xmm1, [16*0 + KEY]
|
| + movdqu xmm3, [16*1 + KEY]
|
| +
|
| + movdqu [16*0 + KS], xmm1
|
| + movdqu [16*1 + KS], xmm3
|
| +
|
| + lea ITR, Lcon1
|
| + movdqu xmm0, [ITR]
|
| + lea ITR, Lmask256
|
| + movdqu xmm5, [ITR]
|
| +
|
| + pxor xmm6, xmm6
|
| +
|
| + mov ITR, 6
|
| +
|
| +Lenc_256_ks_loop:
|
| +
|
| + movdqa xmm2, xmm3
|
| + pshufb xmm2, xmm5
|
| + aesenclast xmm2, xmm0
|
| + pslld xmm0, 1
|
| + movdqa xmm4, xmm1
|
| + pslldq xmm4, 4
|
| + pxor xmm1, xmm4
|
| + pslldq xmm4, 4
|
| + pxor xmm1, xmm4
|
| + pslldq xmm4, 4
|
| + pxor xmm1, xmm4
|
| + pxor xmm1, xmm2
|
| + movdqu [16*2 + KS], xmm1
|
| +
|
| + pshufd xmm2, xmm1, 0ffh
|
| + aesenclast xmm2, xmm6
|
| + movdqa xmm4, xmm3
|
| + pslldq xmm4, 4
|
| + pxor xmm3, xmm4
|
| + pslldq xmm4, 4
|
| + pxor xmm3, xmm4
|
| + pslldq xmm4, 4
|
| + pxor xmm3, xmm4
|
| + pxor xmm3, xmm2
|
| + movdqu [16*3 + KS], xmm3
|
| +
|
| + lea KS, [32 + KS]
|
| + dec ITR
|
| + jnz Lenc_256_ks_loop
|
| +
|
| + movdqa xmm2, xmm3
|
| + pshufb xmm2, xmm5
|
| + aesenclast xmm2, xmm0
|
| + movdqa xmm4, xmm1
|
| + pslldq xmm4, 4
|
| + pxor xmm1, xmm4
|
| + pslldq xmm4, 4
|
| + pxor xmm1, xmm4
|
| + pslldq xmm4, 4
|
| + pxor xmm1, xmm4
|
| + pxor xmm1, xmm2
|
| + movdqu [16*2 + KS], xmm1
|
| +
|
| + ret
|
| +intel_aes_encrypt_init_256 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_init_256 PROC
|
| + mov KEY, [esp + 1*4 + 0*4]
|
| + mov KS, [esp + 1*4 + 1*4]
|
| +
|
| + push KS
|
| + push KEY
|
| +
|
| + call intel_aes_encrypt_init_256
|
| +
|
| + pop KEY
|
| + pop KS
|
| +
|
| + movdqu xmm0, [0*16 + KS]
|
| + movdqu xmm1, [14*16 + KS]
|
| + movdqu [14*16 + KS], xmm0
|
| + movdqu [0*16 + KS], xmm1
|
| +
|
| + i = 1
|
| + WHILE i LT 7
|
| + movdqu xmm0, [i*16 + KS]
|
| + movdqu xmm1, [(14-i)*16 + KS]
|
| +
|
| + aesimc xmm0, xmm0
|
| + aesimc xmm1, xmm1
|
| +
|
| + movdqu [(14-i)*16 + KS], xmm0
|
| + movdqu [i*16 + KS], xmm1
|
| +
|
| + i = i+1
|
| + ENDM
|
| +
|
| + movdqu xmm0, [7*16 + KS]
|
| + aesimc xmm0, xmm0
|
| + movdqu [7*16 + KS], xmm0
|
| + ret
|
| +intel_aes_decrypt_init_256 ENDP
|
| +
|
| +
|
| +
|
| +gen_aes_cbc_enc_func MACRO rnds
|
| +
|
| +LOCAL loop1
|
| +LOCAL bail
|
| +
|
| + push inputLen
|
| +
|
| + mov ctx, [esp + 2*4 + 0*4]
|
| + mov output, [esp + 2*4 + 1*4]
|
| + mov input, [esp + 2*4 + 4*4]
|
| + mov inputLen, [esp + 2*4 + 5*4]
|
| +
|
| + lea ctx, [44+ctx]
|
| +
|
| + movdqu xmm0, [-32+ctx]
|
| +
|
| + movdqu xmm2, [0*16 + ctx]
|
| + movdqu xmm3, [1*16 + ctx]
|
| + movdqu xmm4, [2*16 + ctx]
|
| + movdqu xmm5, [3*16 + ctx]
|
| + movdqu xmm6, [4*16 + ctx]
|
| +
|
| +loop1:
|
| + cmp inputLen, 1*16
|
| + jb bail
|
| +
|
| + movdqu xmm1, [input]
|
| + pxor xmm1, xmm2
|
| + pxor xmm0, xmm1
|
| +
|
| + aesenc xmm0, xmm3
|
| + aesenc xmm0, xmm4
|
| + aesenc xmm0, xmm5
|
| + aesenc xmm0, xmm6
|
| +
|
| + i = 5
|
| + WHILE i LT rnds
|
| + movdqu xmm7, [i*16 + ctx]
|
| + aesenc xmm0, xmm7
|
| + i = i+1
|
| + ENDM
|
| + movdqu xmm7, [rnds*16 + ctx]
|
| + aesenclast xmm0, xmm7
|
| +
|
| + movdqu [output], xmm0
|
| +
|
| + lea input, [1*16 + input]
|
| + lea output, [1*16 + output]
|
| + sub inputLen, 1*16
|
| + jmp loop1
|
| +
|
| +bail:
|
| + movdqu [-32+ctx], xmm0
|
| +
|
| + xor eax, eax
|
| + pop inputLen
|
| + ret
|
| +
|
| +ENDM
|
| +
|
| +gen_aes_cbc_dec_func MACRO rnds
|
| +
|
| +LOCAL loop7
|
| +LOCAL loop1
|
| +LOCAL dec1
|
| +LOCAL bail
|
| +
|
| + push inputLen
|
| +
|
| + mov ctx, [esp + 2*4 + 0*4]
|
| + mov output, [esp + 2*4 + 1*4]
|
| + mov input, [esp + 2*4 + 4*4]
|
| + mov inputLen, [esp + 2*4 + 5*4]
|
| +
|
| + lea ctx, [44+ctx]
|
| +
|
| +loop7:
|
| + cmp inputLen, 7*16
|
| + jb dec1
|
| +
|
| + movdqu xmm0, [0*16 + input]
|
| + movdqu xmm1, [1*16 + input]
|
| + movdqu xmm2, [2*16 + input]
|
| + movdqu xmm3, [3*16 + input]
|
| + movdqu xmm4, [4*16 + input]
|
| + movdqu xmm5, [5*16 + input]
|
| + movdqu xmm6, [6*16 + input]
|
| +
|
| + movdqu xmm7, [0*16 + ctx]
|
| + pxor xmm0, xmm7
|
| + pxor xmm1, xmm7
|
| + pxor xmm2, xmm7
|
| + pxor xmm3, xmm7
|
| + pxor xmm4, xmm7
|
| + pxor xmm5, xmm7
|
| + pxor xmm6, xmm7
|
| +
|
| + i = 1
|
| + WHILE i LT rnds
|
| + aes_dec_rnd i
|
| + i = i+1
|
| + ENDM
|
| + aes_dec_last_rnd rnds
|
| +
|
| + movdqu xmm7, [-32 + ctx]
|
| + pxor xmm0, xmm7
|
| + movdqu xmm7, [0*16 + input]
|
| + pxor xmm1, xmm7
|
| + movdqu xmm7, [1*16 + input]
|
| + pxor xmm2, xmm7
|
| + movdqu xmm7, [2*16 + input]
|
| + pxor xmm3, xmm7
|
| + movdqu xmm7, [3*16 + input]
|
| + pxor xmm4, xmm7
|
| + movdqu xmm7, [4*16 + input]
|
| + pxor xmm5, xmm7
|
| + movdqu xmm7, [5*16 + input]
|
| + pxor xmm6, xmm7
|
| + movdqu xmm7, [6*16 + input]
|
| +
|
| + movdqu [0*16 + output], xmm0
|
| + movdqu [1*16 + output], xmm1
|
| + movdqu [2*16 + output], xmm2
|
| + movdqu [3*16 + output], xmm3
|
| + movdqu [4*16 + output], xmm4
|
| + movdqu [5*16 + output], xmm5
|
| + movdqu [6*16 + output], xmm6
|
| + movdqu [-32 + ctx], xmm7
|
| +
|
| + lea input, [7*16 + input]
|
| + lea output, [7*16 + output]
|
| + sub inputLen, 7*16
|
| + jmp loop7
|
| +dec1:
|
| +
|
| + movdqu xmm3, [-32 + ctx]
|
| +
|
| +loop1:
|
| + cmp inputLen, 1*16
|
| + jb bail
|
| +
|
| + movdqu xmm0, [input]
|
| + movdqa xmm4, xmm0
|
| + movdqu xmm7, [0*16 + ctx]
|
| + pxor xmm0, xmm7
|
| +
|
| + i = 1
|
| + WHILE i LT rnds
|
| + movdqu xmm7, [i*16 + ctx]
|
| + aesdec xmm0, xmm7
|
| + i = i+1
|
| + ENDM
|
| + movdqu xmm7, [rnds*16 + ctx]
|
| + aesdeclast xmm0, xmm7
|
| + pxor xmm3, xmm0
|
| +
|
| + movdqu [output], xmm3
|
| + movdqa xmm3, xmm4
|
| +
|
| + lea input, [1*16 + input]
|
| + lea output, [1*16 + output]
|
| + sub inputLen, 1*16
|
| + jmp loop1
|
| +
|
| +bail:
|
| + movdqu [-32 + ctx], xmm3
|
| + xor eax, eax
|
| + pop inputLen
|
| + ret
|
| +ENDM
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_cbc_128 PROC
|
| +gen_aes_cbc_enc_func 10
|
| +intel_aes_encrypt_cbc_128 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_cbc_192 PROC
|
| +gen_aes_cbc_enc_func 12
|
| +intel_aes_encrypt_cbc_192 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_cbc_256 PROC
|
| +gen_aes_cbc_enc_func 14
|
| +intel_aes_encrypt_cbc_256 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_cbc_128 PROC
|
| +gen_aes_cbc_dec_func 10
|
| +intel_aes_decrypt_cbc_128 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_cbc_192 PROC
|
| +gen_aes_cbc_dec_func 12
|
| +intel_aes_decrypt_cbc_192 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_decrypt_cbc_256 PROC
|
| +gen_aes_cbc_dec_func 14
|
| +intel_aes_decrypt_cbc_256 ENDP
|
| +
|
| +
|
| +
|
| +ctrCtx textequ <esi>
|
| +CTR textequ <ebx>
|
| +
|
| +gen_aes_ctr_func MACRO rnds
|
| +
|
| +LOCAL loop7
|
| +LOCAL loop1
|
| +LOCAL enc1
|
| +LOCAL bail
|
| +
|
| + push inputLen
|
| + push ctrCtx
|
| + push CTR
|
| + push ebp
|
| +
|
| + mov ctrCtx, [esp + 4*5 + 0*4]
|
| + mov output, [esp + 4*5 + 1*4]
|
| + mov input, [esp + 4*5 + 4*4]
|
| + mov inputLen, [esp + 4*5 + 5*4]
|
| +
|
| + mov ctx, [4+ctrCtx]
|
| + lea ctx, [44+ctx]
|
| +
|
| + mov ebp, esp
|
| + sub esp, 7*16
|
| + and esp, -16
|
| +
|
| + movdqu xmm0, [8+ctrCtx]
|
| + mov ctrCtx, [ctrCtx + 8 + 3*4]
|
| + bswap ctrCtx
|
| + movdqu xmm1, [ctx + 0*16]
|
| +
|
| + pxor xmm0, xmm1
|
| +
|
| + movdqa [esp + 0*16], xmm0
|
| + movdqa [esp + 1*16], xmm0
|
| + movdqa [esp + 2*16], xmm0
|
| + movdqa [esp + 3*16], xmm0
|
| + movdqa [esp + 4*16], xmm0
|
| + movdqa [esp + 5*16], xmm0
|
| + movdqa [esp + 6*16], xmm0
|
| +
|
| + inc ctrCtx
|
| + mov CTR, ctrCtx
|
| + bswap CTR
|
| + xor CTR, [ctx + 3*4]
|
| + mov [esp + 1*16 + 3*4], CTR
|
| +
|
| + inc ctrCtx
|
| + mov CTR, ctrCtx
|
| + bswap CTR
|
| + xor CTR, [ctx + 3*4]
|
| + mov [esp + 2*16 + 3*4], CTR
|
| +
|
| + inc ctrCtx
|
| + mov CTR, ctrCtx
|
| + bswap CTR
|
| + xor CTR, [ctx + 3*4]
|
| + mov [esp + 3*16 + 3*4], CTR
|
| +
|
| + inc ctrCtx
|
| + mov CTR, ctrCtx
|
| + bswap CTR
|
| + xor CTR, [ctx + 3*4]
|
| + mov [esp + 4*16 + 3*4], CTR
|
| +
|
| + inc ctrCtx
|
| + mov CTR, ctrCtx
|
| + bswap CTR
|
| + xor CTR, [ctx + 3*4]
|
| + mov [esp + 5*16 + 3*4], CTR
|
| +
|
| + inc ctrCtx
|
| + mov CTR, ctrCtx
|
| + bswap CTR
|
| + xor CTR, [ctx + 3*4]
|
| + mov [esp + 6*16 + 3*4], CTR
|
| +
|
| +
|
| +loop7:
|
| + cmp inputLen, 7*16
|
| + jb loop1
|
| +
|
| + movdqu xmm0, [0*16 + esp]
|
| + movdqu xmm1, [1*16 + esp]
|
| + movdqu xmm2, [2*16 + esp]
|
| + movdqu xmm3, [3*16 + esp]
|
| + movdqu xmm4, [4*16 + esp]
|
| + movdqu xmm5, [5*16 + esp]
|
| + movdqu xmm6, [6*16 + esp]
|
| +
|
| + i = 1
|
| + WHILE i LE 7
|
| + aes_rnd i
|
| +
|
| + inc ctrCtx
|
| + mov CTR, ctrCtx
|
| + bswap CTR
|
| + xor CTR, [ctx + 3*4]
|
| + mov [esp + (i-1)*16 + 3*4], CTR
|
| +
|
| + i = i+1
|
| + ENDM
|
| + WHILE i LT rnds
|
| + aes_rnd i
|
| + i = i+1
|
| + ENDM
|
| + aes_last_rnd rnds
|
| +
|
| + movdqu xmm7, [0*16 + input]
|
| + pxor xmm0, xmm7
|
| + movdqu xmm7, [1*16 + input]
|
| + pxor xmm1, xmm7
|
| + movdqu xmm7, [2*16 + input]
|
| + pxor xmm2, xmm7
|
| + movdqu xmm7, [3*16 + input]
|
| + pxor xmm3, xmm7
|
| + movdqu xmm7, [4*16 + input]
|
| + pxor xmm4, xmm7
|
| + movdqu xmm7, [5*16 + input]
|
| + pxor xmm5, xmm7
|
| + movdqu xmm7, [6*16 + input]
|
| + pxor xmm6, xmm7
|
| +
|
| + movdqu [0*16 + output], xmm0
|
| + movdqu [1*16 + output], xmm1
|
| + movdqu [2*16 + output], xmm2
|
| + movdqu [3*16 + output], xmm3
|
| + movdqu [4*16 + output], xmm4
|
| + movdqu [5*16 + output], xmm5
|
| + movdqu [6*16 + output], xmm6
|
| +
|
| + lea input, [7*16 + input]
|
| + lea output, [7*16 + output]
|
| + sub inputLen, 7*16
|
| + jmp loop7
|
| +
|
| +
|
| +loop1:
|
| + cmp inputLen, 1*16
|
| + jb bail
|
| +
|
| + movdqu xmm0, [esp]
|
| + add esp, 16
|
| +
|
| + i = 1
|
| + WHILE i LT rnds
|
| + movdqu xmm7, [i*16 + ctx]
|
| + aesenc xmm0, xmm7
|
| + i = i+1
|
| + ENDM
|
| + movdqu xmm7, [rnds*16 + ctx]
|
| + aesenclast xmm0, xmm7
|
| +
|
| + movdqu xmm7, [input]
|
| + pxor xmm0, xmm7
|
| + movdqu [output], xmm0
|
| +
|
| + lea input, [1*16 + input]
|
| + lea output, [1*16 + output]
|
| + sub inputLen, 1*16
|
| + jmp loop1
|
| +
|
| +bail:
|
| +
|
| + mov ctrCtx, [ebp + 4*5 + 0*4]
|
| + movdqu xmm0, [esp]
|
| + movdqu xmm1, [ctx + 0*16]
|
| + pxor xmm0, xmm1
|
| + movdqu [8+ctrCtx], xmm0
|
| +
|
| +
|
| + xor eax, eax
|
| + mov esp, ebp
|
| + pop ebp
|
| + pop CTR
|
| + pop ctrCtx
|
| + pop inputLen
|
| + ret
|
| +ENDM
|
| +
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_ctr_128 PROC
|
| +gen_aes_ctr_func 10
|
| +intel_aes_encrypt_ctr_128 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_ctr_192 PROC
|
| +gen_aes_ctr_func 12
|
| +intel_aes_encrypt_ctr_192 ENDP
|
| +
|
| +ALIGN 16
|
| +intel_aes_encrypt_ctr_256 PROC
|
| +gen_aes_ctr_func 14
|
| +intel_aes_encrypt_ctr_256 ENDP
|
| +
|
| +
|
| +END
|
|
|