| Index: nss/lib/freebl/intel-aes-x86-masm.asm
|
| diff --git a/nss/lib/freebl/intel-aes-x86-masm.asm b/nss/lib/freebl/intel-aes-x86-masm.asm
|
| deleted file mode 100644
|
| index 7d805e7660f15d20f89911424dc83dbb7d906dca..0000000000000000000000000000000000000000
|
| --- a/nss/lib/freebl/intel-aes-x86-masm.asm
|
| +++ /dev/null
|
| @@ -1,949 +0,0 @@
|
| -; LICENSE:
|
| -; This submission to NSS is to be made available under the terms of the
|
| -; Mozilla Public License, v. 2.0. You can obtain one at http:
|
| -; //mozilla.org/MPL/2.0/.
|
| -;###############################################################################
|
| -; Copyright(c) 2014, Intel Corp.
|
| -; Developers and authors:
|
| -; Shay Gueron and Vlad Krasnov
|
| -; Intel Corporation, Israel Development Centre, Haifa, Israel
|
| -; Please send feedback directly to crypto.feedback.alias@intel.com
|
| -
|
| -
|
| -.MODEL FLAT, C
|
| -.XMM
|
| -
|
| -.DATA
|
| -ALIGN 16
|
| -Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
|
| -Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
|
| -Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
|
| -Lcon1 dd 1,1,1,1
|
| -Lcon2 dd 1bh,1bh,1bh,1bh
|
| -
|
| -.CODE
|
| -
|
| -ctx textequ <ecx>
|
| -output textequ <edx>
|
| -input textequ <eax>
|
| -inputLen textequ <edi>
|
| -
|
| -
|
| -aes_rnd MACRO i
|
| - movdqu xmm7, [i*16 + ctx]
|
| - aesenc xmm0, xmm7
|
| - aesenc xmm1, xmm7
|
| - aesenc xmm2, xmm7
|
| - aesenc xmm3, xmm7
|
| - aesenc xmm4, xmm7
|
| - aesenc xmm5, xmm7
|
| - aesenc xmm6, xmm7
|
| - ENDM
|
| -
|
| -aes_last_rnd MACRO i
|
| - movdqu xmm7, [i*16 + ctx]
|
| - aesenclast xmm0, xmm7
|
| - aesenclast xmm1, xmm7
|
| - aesenclast xmm2, xmm7
|
| - aesenclast xmm3, xmm7
|
| - aesenclast xmm4, xmm7
|
| - aesenclast xmm5, xmm7
|
| - aesenclast xmm6, xmm7
|
| - ENDM
|
| -
|
| -aes_dec_rnd MACRO i
|
| - movdqu xmm7, [i*16 + ctx]
|
| - aesdec xmm0, xmm7
|
| - aesdec xmm1, xmm7
|
| - aesdec xmm2, xmm7
|
| - aesdec xmm3, xmm7
|
| - aesdec xmm4, xmm7
|
| - aesdec xmm5, xmm7
|
| - aesdec xmm6, xmm7
|
| - ENDM
|
| -
|
| -aes_dec_last_rnd MACRO i
|
| - movdqu xmm7, [i*16 + ctx]
|
| - aesdeclast xmm0, xmm7
|
| - aesdeclast xmm1, xmm7
|
| - aesdeclast xmm2, xmm7
|
| - aesdeclast xmm3, xmm7
|
| - aesdeclast xmm4, xmm7
|
| - aesdeclast xmm5, xmm7
|
| - aesdeclast xmm6, xmm7
|
| - ENDM
|
| -
|
| -
|
| -gen_aes_ecb_func MACRO enc, rnds
|
| -
|
| -LOCAL loop7
|
| -LOCAL loop1
|
| -LOCAL bail
|
| -
|
| - push inputLen
|
| -
|
| - mov ctx, [esp + 2*4 + 0*4]
|
| - mov output, [esp + 2*4 + 1*4]
|
| - mov input, [esp + 2*4 + 4*4]
|
| - mov inputLen, [esp + 2*4 + 5*4]
|
| -
|
| - lea ctx, [44+ctx]
|
| -
|
| -loop7:
|
| - cmp inputLen, 7*16
|
| - jb loop1
|
| -
|
| - movdqu xmm0, [0*16 + input]
|
| - movdqu xmm1, [1*16 + input]
|
| - movdqu xmm2, [2*16 + input]
|
| - movdqu xmm3, [3*16 + input]
|
| - movdqu xmm4, [4*16 + input]
|
| - movdqu xmm5, [5*16 + input]
|
| - movdqu xmm6, [6*16 + input]
|
| -
|
| - movdqu xmm7, [0*16 + ctx]
|
| - pxor xmm0, xmm7
|
| - pxor xmm1, xmm7
|
| - pxor xmm2, xmm7
|
| - pxor xmm3, xmm7
|
| - pxor xmm4, xmm7
|
| - pxor xmm5, xmm7
|
| - pxor xmm6, xmm7
|
| -
|
| -IF enc eq 1
|
| - rnd textequ <aes_rnd>
|
| - lastrnd textequ <aes_last_rnd>
|
| - aesinst textequ <aesenc>
|
| - aeslastinst textequ <aesenclast>
|
| -ELSE
|
| - rnd textequ <aes_dec_rnd>
|
| - lastrnd textequ <aes_dec_last_rnd>
|
| - aesinst textequ <aesdec>
|
| - aeslastinst textequ <aesdeclast>
|
| -ENDIF
|
| -
|
| - i = 1
|
| - WHILE i LT rnds
|
| - rnd i
|
| - i = i+1
|
| - ENDM
|
| - lastrnd rnds
|
| -
|
| - movdqu [0*16 + output], xmm0
|
| - movdqu [1*16 + output], xmm1
|
| - movdqu [2*16 + output], xmm2
|
| - movdqu [3*16 + output], xmm3
|
| - movdqu [4*16 + output], xmm4
|
| - movdqu [5*16 + output], xmm5
|
| - movdqu [6*16 + output], xmm6
|
| -
|
| - lea input, [7*16 + input]
|
| - lea output, [7*16 + output]
|
| - sub inputLen, 7*16
|
| - jmp loop7
|
| -
|
| -loop1:
|
| - cmp inputLen, 1*16
|
| - jb bail
|
| -
|
| - movdqu xmm0, [input]
|
| - movdqu xmm7, [0*16 + ctx]
|
| - pxor xmm0, xmm7
|
| -
|
| - i = 1
|
| - WHILE i LT rnds
|
| - movdqu xmm7, [i*16 + ctx]
|
| - aesinst xmm0, xmm7
|
| - i = i+1
|
| - ENDM
|
| - movdqu xmm7, [rnds*16 + ctx]
|
| - aeslastinst xmm0, xmm7
|
| -
|
| - movdqu [output], xmm0
|
| -
|
| - lea input, [1*16 + input]
|
| - lea output, [1*16 + output]
|
| - sub inputLen, 1*16
|
| - jmp loop1
|
| -
|
| -bail:
|
| - xor eax, eax
|
| - pop inputLen
|
| - ret
|
| -
|
| -ENDM
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_ecb_128 PROC
|
| -gen_aes_ecb_func 1, 10
|
| -intel_aes_encrypt_ecb_128 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_ecb_192 PROC
|
| -gen_aes_ecb_func 1, 12
|
| -intel_aes_encrypt_ecb_192 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_ecb_256 PROC
|
| -gen_aes_ecb_func 1, 14
|
| -intel_aes_encrypt_ecb_256 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_ecb_128 PROC
|
| -gen_aes_ecb_func 0, 10
|
| -intel_aes_decrypt_ecb_128 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_ecb_192 PROC
|
| -gen_aes_ecb_func 0, 12
|
| -intel_aes_decrypt_ecb_192 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_ecb_256 PROC
|
| -gen_aes_ecb_func 0, 14
|
| -intel_aes_decrypt_ecb_256 ENDP
|
| -
|
| -
|
| -KEY textequ <ecx>
|
| -KS textequ <edx>
|
| -ITR textequ <eax>
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_init_128 PROC
|
| -
|
| - mov KEY, [esp + 1*4 + 0*4]
|
| - mov KS, [esp + 1*4 + 1*4]
|
| -
|
| -
|
| - movdqu xmm1, [KEY]
|
| - movdqu [KS], xmm1
|
| - movdqa xmm2, xmm1
|
| -
|
| - lea ITR, Lcon1
|
| - movdqa xmm0, [ITR]
|
| - lea ITR, Lmask
|
| - movdqa xmm4, [ITR]
|
| -
|
| - mov ITR, 8
|
| -
|
| -Lenc_128_ks_loop:
|
| - lea KS, [16 + KS]
|
| - dec ITR
|
| -
|
| - pshufb xmm2, xmm4
|
| - aesenclast xmm2, xmm0
|
| - pslld xmm0, 1
|
| - movdqa xmm3, xmm1
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pxor xmm1, xmm2
|
| - movdqu [KS], xmm1
|
| - movdqa xmm2, xmm1
|
| -
|
| - jne Lenc_128_ks_loop
|
| -
|
| - lea ITR, Lcon2
|
| - movdqa xmm0, [ITR]
|
| -
|
| - pshufb xmm2, xmm4
|
| - aesenclast xmm2, xmm0
|
| - pslld xmm0, 1
|
| - movdqa xmm3, xmm1
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pxor xmm1, xmm2
|
| - movdqu [16 + KS], xmm1
|
| - movdqa xmm2, xmm1
|
| -
|
| - pshufb xmm2, xmm4
|
| - aesenclast xmm2, xmm0
|
| - movdqa xmm3, xmm1
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pslldq xmm3, 4
|
| - pxor xmm1, xmm3
|
| - pxor xmm1, xmm2
|
| - movdqu [32 + KS], xmm1
|
| - movdqa xmm2, xmm1
|
| -
|
| - ret
|
| -intel_aes_encrypt_init_128 ENDP
|
| -
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_init_128 PROC
|
| -
|
| - mov KEY, [esp + 1*4 + 0*4]
|
| - mov KS, [esp + 1*4 + 1*4]
|
| -
|
| - push KS
|
| - push KEY
|
| -
|
| - call intel_aes_encrypt_init_128
|
| -
|
| - pop KEY
|
| - pop KS
|
| -
|
| - movdqu xmm0, [0*16 + KS]
|
| - movdqu xmm1, [10*16 + KS]
|
| - movdqu [10*16 + KS], xmm0
|
| - movdqu [0*16 + KS], xmm1
|
| -
|
| - i = 1
|
| - WHILE i LT 5
|
| - movdqu xmm0, [i*16 + KS]
|
| - movdqu xmm1, [(10-i)*16 + KS]
|
| -
|
| - aesimc xmm0, xmm0
|
| - aesimc xmm1, xmm1
|
| -
|
| - movdqu [(10-i)*16 + KS], xmm0
|
| - movdqu [i*16 + KS], xmm1
|
| -
|
| - i = i+1
|
| - ENDM
|
| -
|
| - movdqu xmm0, [5*16 + KS]
|
| - aesimc xmm0, xmm0
|
| - movdqu [5*16 + KS], xmm0
|
| - ret
|
| -intel_aes_decrypt_init_128 ENDP
|
| -
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_init_192 PROC
|
| -
|
| - mov KEY, [esp + 1*4 + 0*4]
|
| - mov KS, [esp + 1*4 + 1*4]
|
| -
|
| - pxor xmm3, xmm3
|
| - movdqu xmm1, [KEY]
|
| - pinsrd xmm3, DWORD PTR [16 + KEY], 0
|
| - pinsrd xmm3, DWORD PTR [20 + KEY], 1
|
| -
|
| - movdqu [KS], xmm1
|
| - movdqa xmm5, xmm3
|
| -
|
| - lea ITR, Lcon1
|
| - movdqu xmm0, [ITR]
|
| - lea ITR, Lmask192
|
| - movdqu xmm4, [ITR]
|
| -
|
| - mov ITR, 4
|
| -
|
| -Lenc_192_ks_loop:
|
| - movdqa xmm2, xmm3
|
| - pshufb xmm2, xmm4
|
| - aesenclast xmm2, xmm0
|
| - pslld xmm0, 1
|
| -
|
| - movdqa xmm6, xmm1
|
| - movdqa xmm7, xmm3
|
| - pslldq xmm6, 4
|
| - pslldq xmm7, 4
|
| - pxor xmm1, xmm6
|
| - pxor xmm3, xmm7
|
| - pslldq xmm6, 4
|
| - pxor xmm1, xmm6
|
| - pslldq xmm6, 4
|
| - pxor xmm1, xmm6
|
| - pxor xmm1, xmm2
|
| - pshufd xmm2, xmm1, 0ffh
|
| - pxor xmm3, xmm2
|
| -
|
| - movdqa xmm6, xmm1
|
| - shufpd xmm5, xmm1, 00h
|
| - shufpd xmm6, xmm3, 01h
|
| -
|
| - movdqu [16 + KS], xmm5
|
| - movdqu [32 + KS], xmm6
|
| -
|
| - movdqa xmm2, xmm3
|
| - pshufb xmm2, xmm4
|
| - aesenclast xmm2, xmm0
|
| - pslld xmm0, 1
|
| -
|
| - movdqa xmm6, xmm1
|
| - movdqa xmm7, xmm3
|
| - pslldq xmm6, 4
|
| - pslldq xmm7, 4
|
| - pxor xmm1, xmm6
|
| - pxor xmm3, xmm7
|
| - pslldq xmm6, 4
|
| - pxor xmm1, xmm6
|
| - pslldq xmm6, 4
|
| - pxor xmm1, xmm6
|
| - pxor xmm1, xmm2
|
| - pshufd xmm2, xmm1, 0ffh
|
| - pxor xmm3, xmm2
|
| -
|
| - movdqu [48 + KS], xmm1
|
| - movdqa xmm5, xmm3
|
| -
|
| - lea KS, [48 + KS]
|
| -
|
| - dec ITR
|
| - jnz Lenc_192_ks_loop
|
| -
|
| - movdqu [16 + KS], xmm5
|
| -ret
|
| -intel_aes_encrypt_init_192 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_init_192 PROC
|
| - mov KEY, [esp + 1*4 + 0*4]
|
| - mov KS, [esp + 1*4 + 1*4]
|
| -
|
| - push KS
|
| - push KEY
|
| -
|
| - call intel_aes_encrypt_init_192
|
| -
|
| - pop KEY
|
| - pop KS
|
| -
|
| - movdqu xmm0, [0*16 + KS]
|
| - movdqu xmm1, [12*16 + KS]
|
| - movdqu [12*16 + KS], xmm0
|
| - movdqu [0*16 + KS], xmm1
|
| -
|
| - i = 1
|
| - WHILE i LT 6
|
| - movdqu xmm0, [i*16 + KS]
|
| - movdqu xmm1, [(12-i)*16 + KS]
|
| -
|
| - aesimc xmm0, xmm0
|
| - aesimc xmm1, xmm1
|
| -
|
| - movdqu [(12-i)*16 + KS], xmm0
|
| - movdqu [i*16 + KS], xmm1
|
| -
|
| - i = i+1
|
| - ENDM
|
| -
|
| - movdqu xmm0, [6*16 + KS]
|
| - aesimc xmm0, xmm0
|
| - movdqu [6*16 + KS], xmm0
|
| - ret
|
| -intel_aes_decrypt_init_192 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_init_256 PROC
|
| -
|
| - mov KEY, [esp + 1*4 + 0*4]
|
| - mov KS, [esp + 1*4 + 1*4]
|
| - movdqu xmm1, [16*0 + KEY]
|
| - movdqu xmm3, [16*1 + KEY]
|
| -
|
| - movdqu [16*0 + KS], xmm1
|
| - movdqu [16*1 + KS], xmm3
|
| -
|
| - lea ITR, Lcon1
|
| - movdqu xmm0, [ITR]
|
| - lea ITR, Lmask256
|
| - movdqu xmm5, [ITR]
|
| -
|
| - pxor xmm6, xmm6
|
| -
|
| - mov ITR, 6
|
| -
|
| -Lenc_256_ks_loop:
|
| -
|
| - movdqa xmm2, xmm3
|
| - pshufb xmm2, xmm5
|
| - aesenclast xmm2, xmm0
|
| - pslld xmm0, 1
|
| - movdqa xmm4, xmm1
|
| - pslldq xmm4, 4
|
| - pxor xmm1, xmm4
|
| - pslldq xmm4, 4
|
| - pxor xmm1, xmm4
|
| - pslldq xmm4, 4
|
| - pxor xmm1, xmm4
|
| - pxor xmm1, xmm2
|
| - movdqu [16*2 + KS], xmm1
|
| -
|
| - pshufd xmm2, xmm1, 0ffh
|
| - aesenclast xmm2, xmm6
|
| - movdqa xmm4, xmm3
|
| - pslldq xmm4, 4
|
| - pxor xmm3, xmm4
|
| - pslldq xmm4, 4
|
| - pxor xmm3, xmm4
|
| - pslldq xmm4, 4
|
| - pxor xmm3, xmm4
|
| - pxor xmm3, xmm2
|
| - movdqu [16*3 + KS], xmm3
|
| -
|
| - lea KS, [32 + KS]
|
| - dec ITR
|
| - jnz Lenc_256_ks_loop
|
| -
|
| - movdqa xmm2, xmm3
|
| - pshufb xmm2, xmm5
|
| - aesenclast xmm2, xmm0
|
| - movdqa xmm4, xmm1
|
| - pslldq xmm4, 4
|
| - pxor xmm1, xmm4
|
| - pslldq xmm4, 4
|
| - pxor xmm1, xmm4
|
| - pslldq xmm4, 4
|
| - pxor xmm1, xmm4
|
| - pxor xmm1, xmm2
|
| - movdqu [16*2 + KS], xmm1
|
| -
|
| - ret
|
| -intel_aes_encrypt_init_256 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_init_256 PROC
|
| - mov KEY, [esp + 1*4 + 0*4]
|
| - mov KS, [esp + 1*4 + 1*4]
|
| -
|
| - push KS
|
| - push KEY
|
| -
|
| - call intel_aes_encrypt_init_256
|
| -
|
| - pop KEY
|
| - pop KS
|
| -
|
| - movdqu xmm0, [0*16 + KS]
|
| - movdqu xmm1, [14*16 + KS]
|
| - movdqu [14*16 + KS], xmm0
|
| - movdqu [0*16 + KS], xmm1
|
| -
|
| - i = 1
|
| - WHILE i LT 7
|
| - movdqu xmm0, [i*16 + KS]
|
| - movdqu xmm1, [(14-i)*16 + KS]
|
| -
|
| - aesimc xmm0, xmm0
|
| - aesimc xmm1, xmm1
|
| -
|
| - movdqu [(14-i)*16 + KS], xmm0
|
| - movdqu [i*16 + KS], xmm1
|
| -
|
| - i = i+1
|
| - ENDM
|
| -
|
| - movdqu xmm0, [7*16 + KS]
|
| - aesimc xmm0, xmm0
|
| - movdqu [7*16 + KS], xmm0
|
| - ret
|
| -intel_aes_decrypt_init_256 ENDP
|
| -
|
| -
|
| -
|
| -gen_aes_cbc_enc_func MACRO rnds
|
| -
|
| -LOCAL loop1
|
| -LOCAL bail
|
| -
|
| - push inputLen
|
| -
|
| - mov ctx, [esp + 2*4 + 0*4]
|
| - mov output, [esp + 2*4 + 1*4]
|
| - mov input, [esp + 2*4 + 4*4]
|
| - mov inputLen, [esp + 2*4 + 5*4]
|
| -
|
| - lea ctx, [44+ctx]
|
| -
|
| - movdqu xmm0, [-32+ctx]
|
| -
|
| - movdqu xmm2, [0*16 + ctx]
|
| - movdqu xmm3, [1*16 + ctx]
|
| - movdqu xmm4, [2*16 + ctx]
|
| - movdqu xmm5, [3*16 + ctx]
|
| - movdqu xmm6, [4*16 + ctx]
|
| -
|
| -loop1:
|
| - cmp inputLen, 1*16
|
| - jb bail
|
| -
|
| - movdqu xmm1, [input]
|
| - pxor xmm1, xmm2
|
| - pxor xmm0, xmm1
|
| -
|
| - aesenc xmm0, xmm3
|
| - aesenc xmm0, xmm4
|
| - aesenc xmm0, xmm5
|
| - aesenc xmm0, xmm6
|
| -
|
| - i = 5
|
| - WHILE i LT rnds
|
| - movdqu xmm7, [i*16 + ctx]
|
| - aesenc xmm0, xmm7
|
| - i = i+1
|
| - ENDM
|
| - movdqu xmm7, [rnds*16 + ctx]
|
| - aesenclast xmm0, xmm7
|
| -
|
| - movdqu [output], xmm0
|
| -
|
| - lea input, [1*16 + input]
|
| - lea output, [1*16 + output]
|
| - sub inputLen, 1*16
|
| - jmp loop1
|
| -
|
| -bail:
|
| - movdqu [-32+ctx], xmm0
|
| -
|
| - xor eax, eax
|
| - pop inputLen
|
| - ret
|
| -
|
| -ENDM
|
| -
|
| -gen_aes_cbc_dec_func MACRO rnds
|
| -
|
| -LOCAL loop7
|
| -LOCAL loop1
|
| -LOCAL dec1
|
| -LOCAL bail
|
| -
|
| - push inputLen
|
| -
|
| - mov ctx, [esp + 2*4 + 0*4]
|
| - mov output, [esp + 2*4 + 1*4]
|
| - mov input, [esp + 2*4 + 4*4]
|
| - mov inputLen, [esp + 2*4 + 5*4]
|
| -
|
| - lea ctx, [44+ctx]
|
| -
|
| -loop7:
|
| - cmp inputLen, 7*16
|
| - jb dec1
|
| -
|
| - movdqu xmm0, [0*16 + input]
|
| - movdqu xmm1, [1*16 + input]
|
| - movdqu xmm2, [2*16 + input]
|
| - movdqu xmm3, [3*16 + input]
|
| - movdqu xmm4, [4*16 + input]
|
| - movdqu xmm5, [5*16 + input]
|
| - movdqu xmm6, [6*16 + input]
|
| -
|
| - movdqu xmm7, [0*16 + ctx]
|
| - pxor xmm0, xmm7
|
| - pxor xmm1, xmm7
|
| - pxor xmm2, xmm7
|
| - pxor xmm3, xmm7
|
| - pxor xmm4, xmm7
|
| - pxor xmm5, xmm7
|
| - pxor xmm6, xmm7
|
| -
|
| - i = 1
|
| - WHILE i LT rnds
|
| - aes_dec_rnd i
|
| - i = i+1
|
| - ENDM
|
| - aes_dec_last_rnd rnds
|
| -
|
| - movdqu xmm7, [-32 + ctx]
|
| - pxor xmm0, xmm7
|
| - movdqu xmm7, [0*16 + input]
|
| - pxor xmm1, xmm7
|
| - movdqu xmm7, [1*16 + input]
|
| - pxor xmm2, xmm7
|
| - movdqu xmm7, [2*16 + input]
|
| - pxor xmm3, xmm7
|
| - movdqu xmm7, [3*16 + input]
|
| - pxor xmm4, xmm7
|
| - movdqu xmm7, [4*16 + input]
|
| - pxor xmm5, xmm7
|
| - movdqu xmm7, [5*16 + input]
|
| - pxor xmm6, xmm7
|
| - movdqu xmm7, [6*16 + input]
|
| -
|
| - movdqu [0*16 + output], xmm0
|
| - movdqu [1*16 + output], xmm1
|
| - movdqu [2*16 + output], xmm2
|
| - movdqu [3*16 + output], xmm3
|
| - movdqu [4*16 + output], xmm4
|
| - movdqu [5*16 + output], xmm5
|
| - movdqu [6*16 + output], xmm6
|
| - movdqu [-32 + ctx], xmm7
|
| -
|
| - lea input, [7*16 + input]
|
| - lea output, [7*16 + output]
|
| - sub inputLen, 7*16
|
| - jmp loop7
|
| -dec1:
|
| -
|
| - movdqu xmm3, [-32 + ctx]
|
| -
|
| -loop1:
|
| - cmp inputLen, 1*16
|
| - jb bail
|
| -
|
| - movdqu xmm0, [input]
|
| - movdqa xmm4, xmm0
|
| - movdqu xmm7, [0*16 + ctx]
|
| - pxor xmm0, xmm7
|
| -
|
| - i = 1
|
| - WHILE i LT rnds
|
| - movdqu xmm7, [i*16 + ctx]
|
| - aesdec xmm0, xmm7
|
| - i = i+1
|
| - ENDM
|
| - movdqu xmm7, [rnds*16 + ctx]
|
| - aesdeclast xmm0, xmm7
|
| - pxor xmm3, xmm0
|
| -
|
| - movdqu [output], xmm3
|
| - movdqa xmm3, xmm4
|
| -
|
| - lea input, [1*16 + input]
|
| - lea output, [1*16 + output]
|
| - sub inputLen, 1*16
|
| - jmp loop1
|
| -
|
| -bail:
|
| - movdqu [-32 + ctx], xmm3
|
| - xor eax, eax
|
| - pop inputLen
|
| - ret
|
| -ENDM
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_cbc_128 PROC
|
| -gen_aes_cbc_enc_func 10
|
| -intel_aes_encrypt_cbc_128 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_cbc_192 PROC
|
| -gen_aes_cbc_enc_func 12
|
| -intel_aes_encrypt_cbc_192 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_cbc_256 PROC
|
| -gen_aes_cbc_enc_func 14
|
| -intel_aes_encrypt_cbc_256 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_cbc_128 PROC
|
| -gen_aes_cbc_dec_func 10
|
| -intel_aes_decrypt_cbc_128 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_cbc_192 PROC
|
| -gen_aes_cbc_dec_func 12
|
| -intel_aes_decrypt_cbc_192 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_decrypt_cbc_256 PROC
|
| -gen_aes_cbc_dec_func 14
|
| -intel_aes_decrypt_cbc_256 ENDP
|
| -
|
| -
|
| -
|
| -ctrCtx textequ <esi>
|
| -CTR textequ <ebx>
|
| -
|
| -gen_aes_ctr_func MACRO rnds
|
| -
|
| -LOCAL loop7
|
| -LOCAL loop1
|
| -LOCAL enc1
|
| -LOCAL bail
|
| -
|
| - push inputLen
|
| - push ctrCtx
|
| - push CTR
|
| - push ebp
|
| -
|
| - mov ctrCtx, [esp + 4*5 + 0*4]
|
| - mov output, [esp + 4*5 + 1*4]
|
| - mov input, [esp + 4*5 + 4*4]
|
| - mov inputLen, [esp + 4*5 + 5*4]
|
| -
|
| - mov ctx, [4+ctrCtx]
|
| - lea ctx, [44+ctx]
|
| -
|
| - mov ebp, esp
|
| - sub esp, 7*16
|
| - and esp, -16
|
| -
|
| - movdqu xmm0, [8+ctrCtx]
|
| - mov ctrCtx, [ctrCtx + 8 + 3*4]
|
| - bswap ctrCtx
|
| - movdqu xmm1, [ctx + 0*16]
|
| -
|
| - pxor xmm0, xmm1
|
| -
|
| - movdqa [esp + 0*16], xmm0
|
| - movdqa [esp + 1*16], xmm0
|
| - movdqa [esp + 2*16], xmm0
|
| - movdqa [esp + 3*16], xmm0
|
| - movdqa [esp + 4*16], xmm0
|
| - movdqa [esp + 5*16], xmm0
|
| - movdqa [esp + 6*16], xmm0
|
| -
|
| - inc ctrCtx
|
| - mov CTR, ctrCtx
|
| - bswap CTR
|
| - xor CTR, [ctx + 3*4]
|
| - mov [esp + 1*16 + 3*4], CTR
|
| -
|
| - inc ctrCtx
|
| - mov CTR, ctrCtx
|
| - bswap CTR
|
| - xor CTR, [ctx + 3*4]
|
| - mov [esp + 2*16 + 3*4], CTR
|
| -
|
| - inc ctrCtx
|
| - mov CTR, ctrCtx
|
| - bswap CTR
|
| - xor CTR, [ctx + 3*4]
|
| - mov [esp + 3*16 + 3*4], CTR
|
| -
|
| - inc ctrCtx
|
| - mov CTR, ctrCtx
|
| - bswap CTR
|
| - xor CTR, [ctx + 3*4]
|
| - mov [esp + 4*16 + 3*4], CTR
|
| -
|
| - inc ctrCtx
|
| - mov CTR, ctrCtx
|
| - bswap CTR
|
| - xor CTR, [ctx + 3*4]
|
| - mov [esp + 5*16 + 3*4], CTR
|
| -
|
| - inc ctrCtx
|
| - mov CTR, ctrCtx
|
| - bswap CTR
|
| - xor CTR, [ctx + 3*4]
|
| - mov [esp + 6*16 + 3*4], CTR
|
| -
|
| -
|
| -loop7:
|
| - cmp inputLen, 7*16
|
| - jb loop1
|
| -
|
| - movdqu xmm0, [0*16 + esp]
|
| - movdqu xmm1, [1*16 + esp]
|
| - movdqu xmm2, [2*16 + esp]
|
| - movdqu xmm3, [3*16 + esp]
|
| - movdqu xmm4, [4*16 + esp]
|
| - movdqu xmm5, [5*16 + esp]
|
| - movdqu xmm6, [6*16 + esp]
|
| -
|
| - i = 1
|
| - WHILE i LE 7
|
| - aes_rnd i
|
| -
|
| - inc ctrCtx
|
| - mov CTR, ctrCtx
|
| - bswap CTR
|
| - xor CTR, [ctx + 3*4]
|
| - mov [esp + (i-1)*16 + 3*4], CTR
|
| -
|
| - i = i+1
|
| - ENDM
|
| - WHILE i LT rnds
|
| - aes_rnd i
|
| - i = i+1
|
| - ENDM
|
| - aes_last_rnd rnds
|
| -
|
| - movdqu xmm7, [0*16 + input]
|
| - pxor xmm0, xmm7
|
| - movdqu xmm7, [1*16 + input]
|
| - pxor xmm1, xmm7
|
| - movdqu xmm7, [2*16 + input]
|
| - pxor xmm2, xmm7
|
| - movdqu xmm7, [3*16 + input]
|
| - pxor xmm3, xmm7
|
| - movdqu xmm7, [4*16 + input]
|
| - pxor xmm4, xmm7
|
| - movdqu xmm7, [5*16 + input]
|
| - pxor xmm5, xmm7
|
| - movdqu xmm7, [6*16 + input]
|
| - pxor xmm6, xmm7
|
| -
|
| - movdqu [0*16 + output], xmm0
|
| - movdqu [1*16 + output], xmm1
|
| - movdqu [2*16 + output], xmm2
|
| - movdqu [3*16 + output], xmm3
|
| - movdqu [4*16 + output], xmm4
|
| - movdqu [5*16 + output], xmm5
|
| - movdqu [6*16 + output], xmm6
|
| -
|
| - lea input, [7*16 + input]
|
| - lea output, [7*16 + output]
|
| - sub inputLen, 7*16
|
| - jmp loop7
|
| -
|
| -
|
| -loop1:
|
| - cmp inputLen, 1*16
|
| - jb bail
|
| -
|
| - movdqu xmm0, [esp]
|
| - add esp, 16
|
| -
|
| - i = 1
|
| - WHILE i LT rnds
|
| - movdqu xmm7, [i*16 + ctx]
|
| - aesenc xmm0, xmm7
|
| - i = i+1
|
| - ENDM
|
| - movdqu xmm7, [rnds*16 + ctx]
|
| - aesenclast xmm0, xmm7
|
| -
|
| - movdqu xmm7, [input]
|
| - pxor xmm0, xmm7
|
| - movdqu [output], xmm0
|
| -
|
| - lea input, [1*16 + input]
|
| - lea output, [1*16 + output]
|
| - sub inputLen, 1*16
|
| - jmp loop1
|
| -
|
| -bail:
|
| -
|
| - mov ctrCtx, [ebp + 4*5 + 0*4]
|
| - movdqu xmm0, [esp]
|
| - movdqu xmm1, [ctx + 0*16]
|
| - pxor xmm0, xmm1
|
| - movdqu [8+ctrCtx], xmm0
|
| -
|
| -
|
| - xor eax, eax
|
| - mov esp, ebp
|
| - pop ebp
|
| - pop CTR
|
| - pop ctrCtx
|
| - pop inputLen
|
| - ret
|
| -ENDM
|
| -
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_ctr_128 PROC
|
| -gen_aes_ctr_func 10
|
| -intel_aes_encrypt_ctr_128 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_ctr_192 PROC
|
| -gen_aes_ctr_func 12
|
| -intel_aes_encrypt_ctr_192 ENDP
|
| -
|
| -ALIGN 16
|
| -intel_aes_encrypt_ctr_256 PROC
|
| -gen_aes_ctr_func 14
|
| -intel_aes_encrypt_ctr_256 ENDP
|
| -
|
| -
|
| -END
|
|
|