Index: nss/lib/freebl/intel-gcm-x86-masm.asm |
diff --git a/nss/lib/freebl/intel-gcm-x86-masm.asm b/nss/lib/freebl/intel-gcm-x86-masm.asm |
deleted file mode 100644 |
index 6362ad85953dd8df6be9580e9ebeaa09c82ce51b..0000000000000000000000000000000000000000 |
--- a/nss/lib/freebl/intel-gcm-x86-masm.asm |
+++ /dev/null |
@@ -1,1209 +0,0 @@ |
-; LICENSE: |
-; This submission to NSS is to be made available under the terms of the |
-; Mozilla Public License, v. 2.0. You can obtain one at http: |
-; //mozilla.org/MPL/2.0/. |
-;############################################################################### |
-; Copyright(c) 2014, Intel Corp. |
-; Developers and authors: |
-; Shay Gueron and Vlad Krasnov |
-; Intel Corporation, Israel Development Centre, Haifa, Israel |
-; Please send feedback directly to crypto.feedback.alias@intel.com |
- |
- |
-.MODEL FLAT, C |
-.XMM |
- |
-.DATA |
-ALIGN 16 |
-Lone dq 1,0 |
-Ltwo dq 2,0 |
-Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
-Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh |
-Lpoly dq 01h, 0c200000000000000h |
- |
-.CODE |
- |
- |
-GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4 |
- vpclmulqdq TMP1, SRC2, SRC1, 0h |
- vpclmulqdq TMP4, SRC2, SRC1, 011h |
- |
- vpshufd TMP2, SRC2, 78 |
- vpshufd TMP3, SRC1, 78 |
- vpxor TMP2, TMP2, SRC2 |
- vpxor TMP3, TMP3, SRC1 |
- |
- vpclmulqdq TMP2, TMP2, TMP3, 0h |
- vpxor TMP2, TMP2, TMP1 |
- vpxor TMP2, TMP2, TMP4 |
- |
- vpslldq TMP3, TMP2, 8 |
- vpsrldq TMP2, TMP2, 8 |
- |
- vpxor TMP1, TMP1, TMP3 |
- vpxor TMP4, TMP4, TMP2 |
- |
- vpclmulqdq TMP2, TMP1, [Lpoly], 010h |
- vpshufd TMP3, TMP1, 78 |
- vpxor TMP1, TMP2, TMP3 |
- |
- vpclmulqdq TMP2, TMP1, [Lpoly], 010h |
- vpshufd TMP3, TMP1, 78 |
- vpxor TMP1, TMP2, TMP3 |
- |
- vpxor DST, TMP1, TMP4 |
- |
- ENDM |
- |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
-; |
-; Generates the final GCM tag |
-; void intel_aes_gcmTAG(unsigned char Htbl[16*16], |
-; unsigned char *Tp, |
-; unsigned int Mlen, |
-; unsigned int Alen, |
-; unsigned char* X0, |
-; unsigned char* TAG); |
-; |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
- |
-ALIGN 16 |
-intel_aes_gcmTAG PROC |
- |
-Htbl textequ <eax> |
-Tp textequ <ecx> |
-X0 textequ <edx> |
-TAG textequ <ebx> |
- |
-T textequ <xmm0> |
-TMP0 textequ <xmm1> |
- |
- push ebx |
- |
- mov Htbl, [esp + 2*4 + 0*4] |
- mov Tp, [esp + 2*4 + 1*4] |
- mov X0, [esp + 2*4 + 4*4] |
- mov TAG, [esp + 2*4 + 5*4] |
- |
- vzeroupper |
- vmovdqu T, XMMWORD PTR[Tp] |
- |
- vpxor TMP0, TMP0, TMP0 |
- vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 2*4], 0 |
- vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 3*4], 2 |
- vpsllq TMP0, TMP0, 3 |
- |
- vpxor T, T, TMP0 |
- vmovdqu TMP0, XMMWORD PTR[Htbl] |
- GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 |
- |
- vpshufb T, T, [Lbswap_mask] |
- vpxor T, T, [X0] |
- vmovdqu XMMWORD PTR[TAG], T |
- vzeroupper |
- |
- pop ebx |
- |
- ret |
- |
-intel_aes_gcmTAG ENDP |
- |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
-; |
-; Generates the H table |
-; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR); |
-; |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
- |
-ALIGN 16 |
-intel_aes_gcmINIT PROC |
- |
-Htbl textequ <eax> |
-KS textequ <ecx> |
-NR textequ <edx> |
- |
-T textequ <xmm0> |
-TMP0 textequ <xmm1> |
- |
- mov Htbl, [esp + 4*1 + 0*4] |
- mov KS, [esp + 4*1 + 1*4] |
- mov NR, [esp + 4*1 + 2*4] |
- |
- vzeroupper |
- ; AES-ENC(0) |
- vmovdqu T, XMMWORD PTR[KS] |
- lea KS, [16 + KS] |
- dec NR |
-Lenc_loop: |
- vaesenc T, T, [KS] |
- lea KS, [16 + KS] |
- dec NR |
- jnz Lenc_loop |
- |
- vaesenclast T, T, [KS] |
- vpshufb T, T, [Lbswap_mask] |
- |
- ;Calculate H` = GFMUL(H, 2) |
- vpsrad xmm3, T, 31 |
- vpshufd xmm3, xmm3, 0ffh |
- vpand xmm5, xmm3, [Lpoly] |
- vpsrld xmm3, T, 31 |
- vpslld xmm4, T, 1 |
- vpslldq xmm3, xmm3, 4 |
- vpxor T, xmm4, xmm3 |
- vpxor T, T, xmm5 |
- |
- vmovdqu TMP0, T |
- vmovdqu XMMWORD PTR[Htbl + 0*16], T |
- |
- vpshufd xmm2, T, 78 |
- vpxor xmm2, xmm2, T |
- vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2 |
- |
- i = 1 |
- WHILE i LT 8 |
- GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 |
- vmovdqu XMMWORD PTR[Htbl + i*16], T |
- vpshufd xmm2, T, 78 |
- vpxor xmm2, xmm2, T |
- vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2 |
- i = i+1 |
- ENDM |
- vzeroupper |
- ret |
-intel_aes_gcmINIT ENDP |
- |
- |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
-; |
-; Authenticate only |
-; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp); |
-; |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
- |
-ALIGN 16 |
-intel_aes_gcmAAD PROC |
- |
-Htbl textequ <eax> |
-inp textequ <ecx> |
-len textequ <edx> |
-Tp textequ <ebx> |
-hlp0 textequ <esi> |
- |
-DATA textequ <xmm0> |
-T textequ <xmm1> |
-TMP0 textequ <xmm2> |
-TMP1 textequ <xmm3> |
-TMP2 textequ <xmm4> |
-TMP3 textequ <xmm5> |
-TMP4 textequ <xmm6> |
-Xhi textequ <xmm7> |
- |
-KARATSUBA_AAD MACRO i |
- vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h |
- vpxor TMP0, TMP0, TMP3 |
- vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h |
- vpxor TMP1, TMP1, TMP3 |
- vpshufd TMP3, DATA, 78 |
- vpxor TMP3, TMP3, DATA |
- vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h |
- vpxor TMP2, TMP2, TMP3 |
-ENDM |
- |
- cmp DWORD PTR[esp + 1*3 + 2*4], 0 |
- jnz LbeginAAD |
- ret |
- |
-LbeginAAD: |
- push ebx |
- push esi |
- |
- mov Htbl, [esp + 4*3 + 0*4] |
- mov inp, [esp + 4*3 + 1*4] |
- mov len, [esp + 4*3 + 2*4] |
- mov Tp, [esp + 4*3 + 3*4] |
- |
- vzeroupper |
- |
- vpxor Xhi, Xhi, Xhi |
- |
- vmovdqu T, XMMWORD PTR[Tp] |
- ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first |
- mov hlp0, len |
- and hlp0, 128-1 |
- jz Lmod_loop |
- |
- and len, -128 |
- sub hlp0, 16 |
- |
- ; Prefix block |
- vmovdqu DATA, XMMWORD PTR[inp] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- vpxor DATA, DATA, T |
- |
- vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + hlp0], 0h |
- vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + hlp0], 011h |
- vpshufd TMP3, DATA, 78 |
- vpxor TMP3, TMP3, DATA |
- vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h |
- |
- lea inp, [inp+16] |
- test hlp0, hlp0 |
- jnz Lpre_loop |
- jmp Lred1 |
- |
- ;hash remaining prefix bocks (up to 7 total prefix blocks) |
-Lpre_loop: |
- |
- sub hlp0, 16 |
- |
- vmovdqu DATA, XMMWORD PTR[inp] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- |
- vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 0h |
- vpxor TMP0, TMP0, TMP3 |
- vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 011h |
- vpxor TMP1, TMP1, TMP3 |
- vpshufd TMP3, DATA, 78 |
- vpxor TMP3, TMP3, DATA |
- vpclmulqdq TMP3, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h |
- vpxor TMP2, TMP2, TMP3 |
- |
- test hlp0, hlp0 |
- lea inp, [inp+16] |
- jnz Lpre_loop |
- |
-Lred1: |
- |
- vpxor TMP2, TMP2, TMP0 |
- vpxor TMP2, TMP2, TMP1 |
- vpsrldq TMP3, TMP2, 8 |
- vpslldq TMP2, TMP2, 8 |
- |
- vpxor Xhi, TMP1, TMP3 |
- vpxor T, TMP0, TMP2 |
- |
-Lmod_loop: |
- |
- sub len, 16*8 |
- jb Ldone |
- ; Block #0 |
- vmovdqu DATA, XMMWORD PTR[inp + 16*7] |
- vpshufb DATA, DATA, XMMWORD PTR[Lbswap_mask] |
- |
- vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + 0*16], 0h |
- vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + 0*16], 011h |
- vpshufd TMP3, DATA, 78 |
- vpxor TMP3, TMP3, DATA |
- vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + 0*16], 0h |
- |
- ; Block #1 |
- vmovdqu DATA, XMMWORD PTR[inp + 16*6] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- KARATSUBA_AAD 1 |
- |
- ; Block #2 |
- vmovdqu DATA, XMMWORD PTR[inp + 16*5] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- |
- vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a |
- vpalignr T, T, T, 8 |
- |
- KARATSUBA_AAD 2 |
- |
- vpxor T, T, TMP4 ;reduction stage 1b |
- |
- ; Block #3 |
- vmovdqu DATA, XMMWORD PTR[inp + 16*4] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- KARATSUBA_AAD 3 |
- ; Block #4 |
- vmovdqu DATA, XMMWORD PTR[inp + 16*3] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- |
- vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a |
- vpalignr T, T, T, 8 |
- |
- KARATSUBA_AAD 4 |
- |
- vpxor T, T, TMP4 ;reduction stage 2b |
- ; Block #5 |
- vmovdqu DATA, XMMWORD PTR[inp + 16*2] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- KARATSUBA_AAD 5 |
- |
- vpxor T, T, Xhi ;reduction finalize |
- ; Block #6 |
- vmovdqu DATA, XMMWORD PTR[inp + 16*1] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- KARATSUBA_AAD 6 |
- ; Block #7 |
- vmovdqu DATA, XMMWORD PTR[inp + 16*0] |
- vpshufb DATA, DATA, [Lbswap_mask] |
- vpxor DATA, DATA, T |
- KARATSUBA_AAD 7 |
- ; Aggregated 8 blocks, now karatsuba fixup |
- vpxor TMP2, TMP2, TMP0 |
- vpxor TMP2, TMP2, TMP1 |
- vpsrldq TMP3, TMP2, 8 |
- vpslldq TMP2, TMP2, 8 |
- |
- vpxor Xhi, TMP1, TMP3 |
- vpxor T, TMP0, TMP2 |
- |
- lea inp, [inp + 16*8] |
- jmp Lmod_loop |
- |
-Ldone: |
- vpclmulqdq TMP4, T, [Lpoly], 010h |
- vpalignr T, T, T, 8 |
- vpxor T, T, TMP4 |
- |
- vpclmulqdq TMP4, T, [Lpoly], 010h |
- vpalignr T, T, T, 8 |
- vpxor T, T, TMP4 |
- |
- vpxor T, T, Xhi |
- vmovdqu XMMWORD PTR[Tp], T |
- vzeroupper |
- |
- pop esi |
- pop ebx |
- ret |
- |
-intel_aes_gcmAAD ENDP |
- |
- |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
-; |
-; Encrypt and Authenticate |
-; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len); |
-; |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
- |
-ALIGN 16 |
-intel_aes_gcmENC PROC |
- |
-PT textequ <eax> |
-CT textequ <ecx> |
-Htbl textequ <edx> |
-Gctx textequ <edx> |
-len textequ <DWORD PTR[ebp + 5*4 + 3*4]> |
-KS textequ <esi> |
-NR textequ <DWORD PTR[-40 + KS]> |
- |
-aluCTR textequ <ebx> |
-aluTMP textequ <edi> |
- |
-T textequ <XMMWORD PTR[16*16 + 1*16 + Gctx]> |
-TMP0 textequ <xmm1> |
-TMP1 textequ <xmm2> |
-TMP2 textequ <xmm3> |
-TMP3 textequ <xmm4> |
-TMP4 textequ <xmm5> |
-TMP5 textequ <xmm6> |
- |
-CTR0 textequ <xmm0> |
-CTR1 textequ <xmm1> |
-CTR2 textequ <xmm2> |
-CTR3 textequ <xmm3> |
-CTR4 textequ <xmm4> |
-CTR5 textequ <xmm5> |
-CTR6 textequ <xmm6> |
- |
-ROUND MACRO i |
- vmovdqu xmm7, XMMWORD PTR[i*16 + KS] |
- vaesenc CTR0, CTR0, xmm7 |
- vaesenc CTR1, CTR1, xmm7 |
- vaesenc CTR2, CTR2, xmm7 |
- vaesenc CTR3, CTR3, xmm7 |
- vaesenc CTR4, CTR4, xmm7 |
- vaesenc CTR5, CTR5, xmm7 |
- vaesenc CTR6, CTR6, xmm7 |
-ENDM |
- |
-KARATSUBA MACRO i |
- vpshufd TMP4, TMP5, 78 |
- vpxor TMP4, TMP4, TMP5 |
- vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h |
- vpxor TMP0, TMP0, TMP3 |
- vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl] |
- vpclmulqdq TMP3, TMP5, TMP4, 011h |
- vpxor TMP1, TMP1, TMP3 |
- vpclmulqdq TMP3, TMP5, TMP4, 000h |
- vpxor TMP2, TMP2, TMP3 |
-ENDM |
- |
-NEXTCTR MACRO i |
- add aluCTR, 1 |
- mov aluTMP, aluCTR |
- bswap aluTMP |
- xor aluTMP, [3*4 + KS] |
- mov [3*4 + 8*16 + i*16 + esp], aluTMP |
-ENDM |
- |
- cmp DWORD PTR[1*4 + 3*4 + esp], 0 |
- jne LbeginENC |
- ret |
- |
-LbeginENC: |
- |
- vzeroupper |
- push ebp |
- push ebx |
- push esi |
- push edi |
- |
- mov ebp, esp |
- sub esp, 16*16 |
- and esp, -16 |
- |
- mov PT, [ebp + 5*4 + 0*4] |
- mov CT, [ebp + 5*4 + 1*4] |
- mov Gctx, [ebp + 5*4 + 2*4] |
- |
- mov KS, [16*16 + 3*16 + Gctx] |
- lea KS, [44 + KS] |
- |
- mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] |
- bswap aluCTR |
- |
- |
- vmovdqu TMP0, XMMWORD PTR[0*16 + KS] |
- vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] |
- vmovdqu XMMWORD PTR[8*16 + 0*16 + esp], TMP0 |
- |
- cmp len, 16*7 |
- jb LEncDataSingles |
-; Prepare the "top" counters |
- vmovdqu XMMWORD PTR[8*16 + 1*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[8*16 + 2*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[8*16 + 3*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[8*16 + 4*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[8*16 + 5*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[8*16 + 6*16 + esp], TMP0 |
- |
- vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] |
- vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] |
-; Encrypt the initial 7 blocks |
- sub len, 16*7 |
- vpaddd CTR1, CTR0, XMMWORD PTR[Lone] |
- vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo] |
- vpaddd CTR3, CTR2, XMMWORD PTR[Lone] |
- vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo] |
- vpaddd CTR5, CTR4, XMMWORD PTR[Lone] |
- vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo] |
- |
- vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR6, CTR6, XMMWORD PTR[Lbswap_mask] |
- |
- vmovdqu xmm7, XMMWORD PTR[0*16 + KS] |
- vpxor CTR0, CTR0, xmm7 |
- vpxor CTR1, CTR1, xmm7 |
- vpxor CTR2, CTR2, xmm7 |
- vpxor CTR3, CTR3, xmm7 |
- vpxor CTR4, CTR4, xmm7 |
- vpxor CTR5, CTR5, xmm7 |
- vpxor CTR6, CTR6, xmm7 |
- |
- ROUND 1 |
- |
- add aluCTR, 7 |
- mov aluTMP, aluCTR |
- bswap aluTMP |
- xor aluTMP, [KS + 3*4] |
- mov [8*16 + 0*16 + 3*4 + esp], aluTMP |
- |
- ROUND 2 |
- NEXTCTR 1 |
- ROUND 3 |
- NEXTCTR 2 |
- ROUND 4 |
- NEXTCTR 3 |
- ROUND 5 |
- NEXTCTR 4 |
- ROUND 6 |
- NEXTCTR 5 |
- ROUND 7 |
- NEXTCTR 6 |
- ROUND 8 |
- ROUND 9 |
- vmovdqu xmm7, XMMWORD PTR[10*16 + KS] |
- cmp NR, 10 |
- je @f |
- |
- ROUND 10 |
- ROUND 11 |
- vmovdqu xmm7, XMMWORD PTR[12*16 + KS] |
- cmp NR, 12 |
- je @f |
- |
- ROUND 12 |
- ROUND 13 |
- vmovdqu xmm7, XMMWORD PTR[14*16 + KS] |
-@@: |
- vaesenclast CTR0, CTR0, xmm7 |
- vaesenclast CTR1, CTR1, xmm7 |
- vaesenclast CTR2, CTR2, xmm7 |
- vaesenclast CTR3, CTR3, xmm7 |
- vaesenclast CTR4, CTR4, xmm7 |
- vaesenclast CTR5, CTR5, xmm7 |
- vaesenclast CTR6, CTR6, xmm7 |
- |
- vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT] |
- vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT] |
- vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT] |
- vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT] |
- vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT] |
- vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT] |
- vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT] |
- |
- vmovdqu XMMWORD PTR[0*16 + CT], CTR0 |
- vmovdqu XMMWORD PTR[1*16 + CT], CTR1 |
- vmovdqu XMMWORD PTR[2*16 + CT], CTR2 |
- vmovdqu XMMWORD PTR[3*16 + CT], CTR3 |
- vmovdqu XMMWORD PTR[4*16 + CT], CTR4 |
- vmovdqu XMMWORD PTR[5*16 + CT], CTR5 |
- vmovdqu XMMWORD PTR[6*16 + CT], CTR6 |
- |
- vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] |
- vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask] |
- |
- vmovdqa XMMWORD PTR[1*16 + esp], CTR5 |
- vmovdqa XMMWORD PTR[2*16 + esp], CTR4 |
- vmovdqa XMMWORD PTR[3*16 + esp], CTR3 |
- vmovdqa XMMWORD PTR[4*16 + esp], CTR2 |
- vmovdqa XMMWORD PTR[5*16 + esp], CTR1 |
- vmovdqa XMMWORD PTR[6*16 + esp], CTR0 |
- |
- lea CT, [7*16 + CT] |
- lea PT, [7*16 + PT] |
- jmp LEncData7 |
- |
-LEncData7: |
- cmp len, 16*7 |
- jb LEndEnc7 |
- sub len, 16*7 |
- |
- vpshufd TMP4, TMP5, 78 |
- vpxor TMP4, TMP4, TMP5 |
- vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h |
- vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] |
- vpclmulqdq TMP1, TMP5, TMP4, 011h |
- vpclmulqdq TMP2, TMP5, TMP4, 000h |
- |
- vmovdqu TMP5, XMMWORD PTR[1*16 + esp] |
- KARATSUBA 1 |
- vmovdqu TMP5, XMMWORD PTR[2*16 + esp] |
- KARATSUBA 2 |
- vmovdqu TMP5, XMMWORD PTR[3*16 + esp] |
- KARATSUBA 3 |
- vmovdqu TMP5, XMMWORD PTR[4*16 + esp] |
- KARATSUBA 4 |
- vmovdqu TMP5, XMMWORD PTR[5*16 + esp] |
- KARATSUBA 5 |
- vmovdqu TMP5, XMMWORD PTR[6*16 + esp] |
- vpxor TMP5, TMP5, T |
- KARATSUBA 6 |
- |
- vpxor TMP0, TMP0, TMP1 |
- vpxor TMP0, TMP0, TMP2 |
- vpsrldq TMP3, TMP0, 8 |
- vpxor TMP4, TMP1, TMP3 |
- vpslldq TMP3, TMP0, 8 |
- vpxor TMP5, TMP2, TMP3 |
- |
- vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h |
- vpalignr TMP5,TMP5,TMP5,8 |
- vpxor TMP5, TMP5, TMP1 |
- |
- vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h |
- vpalignr TMP5,TMP5,TMP5,8 |
- vpxor TMP5, TMP5, TMP1 |
- |
- vpxor TMP5, TMP5, TMP4 |
- vmovdqu T, TMP5 |
- |
- vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + esp] |
- vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + esp] |
- vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + esp] |
- vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + esp] |
- vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + esp] |
- vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + esp] |
- vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + esp] |
- |
- ROUND 1 |
- NEXTCTR 0 |
- ROUND 2 |
- NEXTCTR 1 |
- ROUND 3 |
- NEXTCTR 2 |
- ROUND 4 |
- NEXTCTR 3 |
- ROUND 5 |
- NEXTCTR 4 |
- ROUND 6 |
- NEXTCTR 5 |
- ROUND 7 |
- NEXTCTR 6 |
- |
- ROUND 8 |
- ROUND 9 |
- |
- vmovdqu xmm7, XMMWORD PTR[10*16 + KS] |
- cmp NR, 10 |
- je @f |
- |
- ROUND 10 |
- ROUND 11 |
- vmovdqu xmm7, XMMWORD PTR[12*16 + KS] |
- cmp NR, 12 |
- je @f |
- |
- ROUND 12 |
- ROUND 13 |
- vmovdqu xmm7, XMMWORD PTR[14*16 + KS] |
-@@: |
- vaesenclast CTR0, CTR0, xmm7 |
- vaesenclast CTR1, CTR1, xmm7 |
- vaesenclast CTR2, CTR2, xmm7 |
- vaesenclast CTR3, CTR3, xmm7 |
- vaesenclast CTR4, CTR4, xmm7 |
- vaesenclast CTR5, CTR5, xmm7 |
- vaesenclast CTR6, CTR6, xmm7 |
- |
- vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT] |
- vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT] |
- vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT] |
- vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT] |
- vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT] |
- vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT] |
- vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT] |
- |
- vmovdqu XMMWORD PTR[0*16 + CT], CTR0 |
- vmovdqu XMMWORD PTR[1*16 + CT], CTR1 |
- vmovdqu XMMWORD PTR[2*16 + CT], CTR2 |
- vmovdqu XMMWORD PTR[3*16 + CT], CTR3 |
- vmovdqu XMMWORD PTR[4*16 + CT], CTR4 |
- vmovdqu XMMWORD PTR[5*16 + CT], CTR5 |
- vmovdqu XMMWORD PTR[6*16 + CT], CTR6 |
- |
- vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] |
- vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] |
- vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask] |
- |
- vmovdqa XMMWORD PTR[1*16 + esp], CTR5 |
- vmovdqa XMMWORD PTR[2*16 + esp], CTR4 |
- vmovdqa XMMWORD PTR[3*16 + esp], CTR3 |
- vmovdqa XMMWORD PTR[4*16 + esp], CTR2 |
- vmovdqa XMMWORD PTR[5*16 + esp], CTR1 |
- vmovdqa XMMWORD PTR[6*16 + esp], CTR0 |
- |
- lea CT, [7*16 + CT] |
- lea PT, [7*16 + PT] |
- jmp LEncData7 |
- |
-LEndEnc7: |
- |
- vpshufd TMP4, TMP5, 78 |
- vpxor TMP4, TMP4, TMP5 |
- vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h |
- vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] |
- vpclmulqdq TMP1, TMP5, TMP4, 011h |
- vpclmulqdq TMP2, TMP5, TMP4, 000h |
- |
- vmovdqu TMP5, XMMWORD PTR[1*16 + esp] |
- KARATSUBA 1 |
- vmovdqu TMP5, XMMWORD PTR[2*16 + esp] |
- KARATSUBA 2 |
- vmovdqu TMP5, XMMWORD PTR[3*16 + esp] |
- KARATSUBA 3 |
- vmovdqu TMP5, XMMWORD PTR[4*16 + esp] |
- KARATSUBA 4 |
- vmovdqu TMP5, XMMWORD PTR[5*16 + esp] |
- KARATSUBA 5 |
- vmovdqu TMP5, XMMWORD PTR[6*16 + esp] |
- vpxor TMP5, TMP5, T |
- KARATSUBA 6 |
- |
- vpxor TMP0, TMP0, TMP1 |
- vpxor TMP0, TMP0, TMP2 |
- vpsrldq TMP3, TMP0, 8 |
- vpxor TMP4, TMP1, TMP3 |
- vpslldq TMP3, TMP0, 8 |
- vpxor TMP5, TMP2, TMP3 |
- |
- vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h |
- vpalignr TMP5,TMP5,TMP5,8 |
- vpxor TMP5, TMP5, TMP1 |
- |
- vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h |
- vpalignr TMP5,TMP5,TMP5,8 |
- vpxor TMP5, TMP5, TMP1 |
- |
- vpxor TMP5, TMP5, TMP4 |
- vmovdqu T, TMP5 |
- |
- sub aluCTR, 6 |
- |
-LEncDataSingles: |
- |
- cmp len, 16 |
- jb LEncDataTail |
- sub len, 16 |
- |
- vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp] |
- NEXTCTR 0 |
- |
- vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[10*16 + KS] |
- cmp NR, 10 |
- je @f |
- vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[12*16 + KS] |
- cmp NR, 12 |
- je @f |
- vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[14*16 + KS] |
-@@: |
- vaesenclast TMP1, TMP1, TMP2 |
- vpxor TMP1, TMP1, XMMWORD PTR[PT] |
- vmovdqu XMMWORD PTR[CT], TMP1 |
- |
- lea PT, [16+PT] |
- lea CT, [16+CT] |
- |
- vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] |
- vpxor TMP1, TMP1, T |
- |
- vmovdqu TMP0, XMMWORD PTR[Htbl] |
- GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 |
- vmovdqu T, TMP1 |
- |
- jmp LEncDataSingles |
- |
-LEncDataTail: |
- |
- cmp len, 0 |
- je LEncDataEnd |
- |
- vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp] |
- |
- vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[10*16 + KS] |
- cmp NR, 10 |
- je @f |
- vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[12*16 + KS] |
- cmp NR, 12 |
- je @f |
- vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[14*16 + KS] |
-@@: |
- vaesenclast TMP1, TMP1, TMP2 |
-; zero a temp location |
- vpxor TMP2, TMP2, TMP2 |
- vmovdqa XMMWORD PTR[esp], TMP2 |
-; copy as many bytes as needed |
- xor KS, KS |
- mov aluTMP, edx |
-@@: |
- cmp len, KS |
- je @f |
- mov dl, BYTE PTR[PT + KS] |
- mov BYTE PTR[esp + KS], dl |
- inc KS |
- jmp @b |
-@@: |
- vpxor TMP1, TMP1, XMMWORD PTR[esp] |
- vmovdqa XMMWORD PTR[esp], TMP1 |
- xor KS, KS |
-@@: |
- cmp len, KS |
- je @f |
- mov dl, BYTE PTR[esp + KS] |
- mov BYTE PTR[CT + KS], dl |
- inc KS |
- jmp @b |
-@@: |
- cmp KS, 16 |
- je @f |
- mov BYTE PTR[esp + KS], 0 |
- inc KS |
- jmp @b |
-@@: |
- mov edx, aluTMP |
- vmovdqa TMP1, XMMWORD PTR[esp] |
- vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] |
- vpxor TMP1, TMP1, T |
- |
- vmovdqu TMP0, XMMWORD PTR[Htbl] |
- GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 |
- vmovdqu T, TMP1 |
- |
-LEncDataEnd: |
- inc aluCTR |
- bswap aluCTR |
- mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR |
- |
- mov esp, ebp |
- pop edi |
- pop esi |
- pop ebx |
- pop ebp |
- |
- |
- vzeroupper |
- |
- ret |
-intel_aes_gcmENC ENDP |
- |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
-; |
-; Decrypt and Authenticate |
-; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len); |
-; |
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
- |
- |
-NEXTCTR MACRO i |
- add aluCTR, 1 |
- mov aluTMP, aluCTR |
- bswap aluTMP |
- xor aluTMP, [3*4 + KS] |
- mov [3*4 + i*16 + esp], aluTMP |
-ENDM |
- |
-intel_aes_gcmDEC PROC |
- |
- cmp DWORD PTR[1*4 + 3*4 + esp], 0 |
- jne LbeginDEC |
- ret |
- |
-LbeginDEC: |
- |
- vzeroupper |
- push ebp |
- push ebx |
- push esi |
- push edi |
- |
- mov ebp, esp |
- sub esp, 8*16 |
- and esp, -16 |
- |
- mov CT, [ebp + 5*4 + 0*4] |
- mov PT, [ebp + 5*4 + 1*4] |
- mov Gctx, [ebp + 5*4 + 2*4] |
- |
- mov KS, [16*16 + 3*16 + Gctx] |
- lea KS, [44 + KS] |
- |
- mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] |
- bswap aluCTR |
- |
- |
- vmovdqu TMP0, XMMWORD PTR[0*16 + KS] |
- vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] |
- vmovdqu XMMWORD PTR[0*16 + esp], TMP0 |
- |
- cmp len, 16*7 |
- jb LDecDataSingles |
- vmovdqu XMMWORD PTR[1*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[2*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[3*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[4*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[5*16 + esp], TMP0 |
- vmovdqu XMMWORD PTR[6*16 + esp], TMP0 |
- dec aluCTR |
- |
-LDecData7: |
- cmp len, 16*7 |
- jb LDecData7End |
- sub len, 16*7 |
- |
- vmovdqu TMP5, XMMWORD PTR[0*16 + CT] |
- vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] |
- vpxor TMP5, TMP5, T |
- vpshufd TMP4, TMP5, 78 |
- vpxor TMP4, TMP4, TMP5 |
- vpclmulqdq TMP0, TMP4, XMMWORD PTR[6*16 + 8*16 + Htbl], 000h |
- vmovdqu TMP4, XMMWORD PTR[6*16 + Htbl] |
- vpclmulqdq TMP1, TMP5, TMP4, 011h |
- vpclmulqdq TMP2, TMP5, TMP4, 000h |
- |
- NEXTCTR 0 |
- vmovdqu TMP5, XMMWORD PTR[1*16 + CT] |
- vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] |
- KARATSUBA 5 |
- NEXTCTR 1 |
- vmovdqu TMP5, XMMWORD PTR[2*16 + CT] |
- vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] |
- KARATSUBA 4 |
- NEXTCTR 2 |
- vmovdqu TMP5, XMMWORD PTR[3*16 + CT] |
- vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] |
- KARATSUBA 3 |
- NEXTCTR 3 |
- vmovdqu TMP5, XMMWORD PTR[4*16 + CT] |
- vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] |
- KARATSUBA 2 |
- NEXTCTR 4 |
- vmovdqu TMP5, XMMWORD PTR[5*16 + CT] |
- vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] |
- KARATSUBA 1 |
- NEXTCTR 5 |
- vmovdqu TMP5, XMMWORD PTR[6*16 + CT] |
- vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] |
- KARATSUBA 0 |
- NEXTCTR 6 |
- |
- vpxor TMP0, TMP0, TMP1 |
- vpxor TMP0, TMP0, TMP2 |
- vpsrldq TMP3, TMP0, 8 |
- vpxor TMP4, TMP1, TMP3 |
- vpslldq TMP3, TMP0, 8 |
- vpxor TMP5, TMP2, TMP3 |
- |
- vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h |
- vpalignr TMP5,TMP5,TMP5,8 |
- vpxor TMP5, TMP5, TMP1 |
- |
- vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h |
- vpalignr TMP5,TMP5,TMP5,8 |
- vpxor TMP5, TMP5, TMP1 |
- |
- vpxor TMP5, TMP5, TMP4 |
- vmovdqu T, TMP5 |
- |
- vmovdqa CTR0, XMMWORD PTR[0*16 + esp] |
- vmovdqa CTR1, XMMWORD PTR[1*16 + esp] |
- vmovdqa CTR2, XMMWORD PTR[2*16 + esp] |
- vmovdqa CTR3, XMMWORD PTR[3*16 + esp] |
- vmovdqa CTR4, XMMWORD PTR[4*16 + esp] |
- vmovdqa CTR5, XMMWORD PTR[5*16 + esp] |
- vmovdqa CTR6, XMMWORD PTR[6*16 + esp] |
- |
- ROUND 1 |
- ROUND 2 |
- ROUND 3 |
- ROUND 4 |
- ROUND 5 |
- ROUND 6 |
- ROUND 7 |
- ROUND 8 |
- ROUND 9 |
- vmovdqu xmm7, XMMWORD PTR[10*16 + KS] |
- cmp NR, 10 |
- je @f |
- |
- ROUND 10 |
- ROUND 11 |
- vmovdqu xmm7, XMMWORD PTR[12*16 + KS] |
- cmp NR, 12 |
- je @f |
- |
- ROUND 12 |
- ROUND 13 |
- vmovdqu xmm7, XMMWORD PTR[14*16 + KS] |
-@@: |
- vaesenclast CTR0, CTR0, xmm7 |
- vaesenclast CTR1, CTR1, xmm7 |
- vaesenclast CTR2, CTR2, xmm7 |
- vaesenclast CTR3, CTR3, xmm7 |
- vaesenclast CTR4, CTR4, xmm7 |
- vaesenclast CTR5, CTR5, xmm7 |
- vaesenclast CTR6, CTR6, xmm7 |
- |
- vpxor CTR0, CTR0, XMMWORD PTR[0*16 + CT] |
- vpxor CTR1, CTR1, XMMWORD PTR[1*16 + CT] |
- vpxor CTR2, CTR2, XMMWORD PTR[2*16 + CT] |
- vpxor CTR3, CTR3, XMMWORD PTR[3*16 + CT] |
- vpxor CTR4, CTR4, XMMWORD PTR[4*16 + CT] |
- vpxor CTR5, CTR5, XMMWORD PTR[5*16 + CT] |
- vpxor CTR6, CTR6, XMMWORD PTR[6*16 + CT] |
- |
- vmovdqu XMMWORD PTR[0*16 + PT], CTR0 |
- vmovdqu XMMWORD PTR[1*16 + PT], CTR1 |
- vmovdqu XMMWORD PTR[2*16 + PT], CTR2 |
- vmovdqu XMMWORD PTR[3*16 + PT], CTR3 |
- vmovdqu XMMWORD PTR[4*16 + PT], CTR4 |
- vmovdqu XMMWORD PTR[5*16 + PT], CTR5 |
- vmovdqu XMMWORD PTR[6*16 + PT], CTR6 |
- |
- lea CT, [7*16 + CT] |
- lea PT, [7*16 + PT] |
- jmp LDecData7 |
- |
-LDecData7End: |
- |
- NEXTCTR 0 |
- |
-LDecDataSingles: |
- |
- cmp len, 16 |
- jb LDecDataTail |
- sub len, 16 |
- |
- vmovdqu TMP1, XMMWORD PTR[CT] |
- vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] |
- vpxor TMP1, TMP1, T |
- |
- vmovdqu TMP0, XMMWORD PTR[Htbl] |
- GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 |
- vmovdqu T, TMP1 |
- |
- vmovdqa TMP1, XMMWORD PTR[0*16 + esp] |
- NEXTCTR 0 |
- |
- vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[10*16 + KS] |
- cmp NR, 10 |
- je @f |
- vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[12*16 + KS] |
- cmp NR, 12 |
- je @f |
- vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[14*16 + KS] |
-@@: |
- vaesenclast TMP1, TMP1, TMP2 |
- vpxor TMP1, TMP1, XMMWORD PTR[CT] |
- vmovdqu XMMWORD PTR[PT], TMP1 |
- |
- lea PT, [16+PT] |
- lea CT, [16+CT] |
- jmp LDecDataSingles |
- |
-LDecDataTail: |
- |
- cmp len, 0 |
- je LDecDataEnd |
- |
- vmovdqa TMP1, XMMWORD PTR[0*16 + esp] |
- inc aluCTR |
- vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[10*16 + KS] |
- cmp NR, 10 |
- je @f |
- vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[12*16 + KS] |
- cmp NR, 12 |
- je @f |
- vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] |
- vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] |
- vmovdqu TMP2, XMMWORD PTR[14*16 + KS] |
-@@: |
- vaesenclast xmm7, TMP1, TMP2 |
- |
-; copy as many bytes as needed |
- xor KS, KS |
- mov aluTMP, edx |
-@@: |
- cmp len, KS |
- je @f |
- mov dl, BYTE PTR[CT + KS] |
- mov BYTE PTR[esp + KS], dl |
- inc KS |
- jmp @b |
-@@: |
- cmp KS, 16 |
- je @f |
- mov BYTE PTR[esp + KS], 0 |
- inc KS |
- jmp @b |
-@@: |
- mov edx, aluTMP |
- vmovdqa TMP1, XMMWORD PTR[esp] |
- vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] |
- vpxor TMP1, TMP1, T |
- |
- vmovdqu TMP0, XMMWORD PTR[Htbl] |
- GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 |
- vmovdqu T, TMP1 |
- |
- vpxor xmm7, xmm7, XMMWORD PTR[esp] |
- vmovdqa XMMWORD PTR[esp], xmm7 |
- xor KS, KS |
- mov aluTMP, edx |
-@@: |
- cmp len, KS |
- je @f |
- mov dl, BYTE PTR[esp + KS] |
- mov BYTE PTR[PT + KS], dl |
- inc KS |
- jmp @b |
-@@: |
- mov edx, aluTMP |
- |
-LDecDataEnd: |
- |
- bswap aluCTR |
- mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR |
- |
- mov esp, ebp |
- pop edi |
- pop esi |
- pop ebx |
- pop ebp |
- |
- vzeroupper |
- |
- ret |
-intel_aes_gcmDEC ENDP |
- |
- |
-END |