| Index: third_party/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
|
| diff --git a/third_party/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..1c7360d085ba57a2d98d49efeafb1b4d03a2cc00
|
| --- /dev/null
|
| +++ b/third_party/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
|
| @@ -0,0 +1,3270 @@
|
| +default rel
|
| +%define XMMWORD
|
| +%define YMMWORD
|
| +%define ZMMWORD
|
| +section .data data align=8
|
| +
|
| +
|
| +ALIGN 16
|
| +one:
|
| + DQ 1,0
|
| +two:
|
| + DQ 2,0
|
| +three:
|
| + DQ 3,0
|
| +four:
|
| + DQ 4,0
|
| +five:
|
| + DQ 5,0
|
| +six:
|
| + DQ 6,0
|
| +seven:
|
| + DQ 7,0
|
| +eight:
|
| + DQ 8,0
|
| +
|
| +OR_MASK:
|
| + DD 0x00000000,0x00000000,0x00000000,0x80000000
|
| +poly:
|
| + DQ 0x1,0xc200000000000000
|
| +mask:
|
| + DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
|
| +con1:
|
| + DD 1,1,1,1
|
| +con2:
|
| + DD 0x1b,0x1b,0x1b,0x1b
|
| +con3:
|
| +DB -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7
|
| +and_mask:
|
| + DD 0,0xffffffff,0xffffffff,0xffffffff
|
| +section .text code align=64
|
| +
|
| +
|
| +ALIGN 16
|
| +GFMUL:
|
| +
|
| + vpclmulqdq xmm2,xmm0,xmm1,0x00
|
| + vpclmulqdq xmm5,xmm0,xmm1,0x11
|
| + vpclmulqdq xmm3,xmm0,xmm1,0x10
|
| + vpclmulqdq xmm4,xmm0,xmm1,0x01
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm3,8
|
| + vpsrldq xmm3,xmm3,8
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpxor xmm5,xmm5,xmm3
|
| +
|
| + vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10
|
| + vpshufd xmm4,xmm2,78
|
| + vpxor xmm2,xmm3,xmm4
|
| +
|
| + vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10
|
| + vpshufd xmm4,xmm2,78
|
| + vpxor xmm2,xmm3,xmm4
|
| +
|
| + vpxor xmm0,xmm2,xmm5
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +
|
| +global aesgcmsiv_htable_init
|
| +
|
| +ALIGN 16
|
| +aesgcmsiv_htable_init:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aesgcmsiv_htable_init:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| +
|
| +
|
| +
|
| + vmovdqa xmm0,XMMWORD[rsi]
|
| + vmovdqa xmm1,xmm0
|
| + vmovdqa XMMWORD[rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[16+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[32+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[48+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[64+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[80+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[96+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[112+rdi],xmm0
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aesgcmsiv_htable_init:
|
| +global aesgcmsiv_htable6_init
|
| +
|
| +ALIGN 16
|
| +aesgcmsiv_htable6_init:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aesgcmsiv_htable6_init:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| +
|
| +
|
| +
|
| + vmovdqa xmm0,XMMWORD[rsi]
|
| + vmovdqa xmm1,xmm0
|
| + vmovdqa XMMWORD[rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[16+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[32+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[48+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[64+rdi],xmm0
|
| + call GFMUL
|
| + vmovdqa XMMWORD[80+rdi],xmm0
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aesgcmsiv_htable6_init:
|
| +global aesgcmsiv_htable_polyval
|
| +
|
| +ALIGN 16
|
| +aesgcmsiv_htable_polyval:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aesgcmsiv_htable_polyval:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| +
|
| +
|
| +
|
| + test rdx,rdx
|
| + jnz NEAR $L$htable_polyval_start
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$htable_polyval_start:
|
| + vzeroall
|
| +
|
| +
|
| +
|
| + mov r11,rdx
|
| + and r11,127
|
| +
|
| + jz NEAR $L$htable_polyval_no_prefix
|
| +
|
| + vpxor xmm9,xmm9,xmm9
|
| + vmovdqa xmm1,XMMWORD[rcx]
|
| + sub rdx,r11
|
| +
|
| + sub r11,16
|
| +
|
| +
|
| + vmovdqu xmm0,XMMWORD[rsi]
|
| + vpxor xmm0,xmm0,xmm1
|
| +
|
| + vpclmulqdq xmm5,xmm0,XMMWORD[r11*1+rdi],0x01
|
| + vpclmulqdq xmm3,xmm0,XMMWORD[r11*1+rdi],0x00
|
| + vpclmulqdq xmm4,xmm0,XMMWORD[r11*1+rdi],0x11
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| + lea rsi,[16+rsi]
|
| + test r11,r11
|
| + jnz NEAR $L$htable_polyval_prefix_loop
|
| + jmp NEAR $L$htable_polyval_prefix_complete
|
| +
|
| +
|
| +ALIGN 64
|
| +$L$htable_polyval_prefix_loop:
|
| + sub r11,16
|
| +
|
| + vmovdqu xmm0,XMMWORD[rsi]
|
| +
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x00
|
| + vpxor xmm3,xmm3,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x11
|
| + vpxor xmm4,xmm4,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x01
|
| + vpxor xmm5,xmm5,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| + test r11,r11
|
| +
|
| + lea rsi,[16+rsi]
|
| +
|
| + jnz NEAR $L$htable_polyval_prefix_loop
|
| +
|
| +$L$htable_polyval_prefix_complete:
|
| + vpsrldq xmm6,xmm5,8
|
| + vpslldq xmm5,xmm5,8
|
| +
|
| + vpxor xmm9,xmm4,xmm6
|
| + vpxor xmm1,xmm3,xmm5
|
| +
|
| + jmp NEAR $L$htable_polyval_main_loop
|
| +
|
| +$L$htable_polyval_no_prefix:
|
| +
|
| +
|
| +
|
| +
|
| + vpxor xmm1,xmm1,xmm1
|
| + vmovdqa xmm9,XMMWORD[rcx]
|
| +
|
| +ALIGN 64
|
| +$L$htable_polyval_main_loop:
|
| + sub rdx,0x80
|
| + jb NEAR $L$htable_polyval_out
|
| +
|
| + vmovdqu xmm0,XMMWORD[112+rsi]
|
| +
|
| + vpclmulqdq xmm5,xmm0,XMMWORD[rdi],0x01
|
| + vpclmulqdq xmm3,xmm0,XMMWORD[rdi],0x00
|
| + vpclmulqdq xmm4,xmm0,XMMWORD[rdi],0x11
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| +
|
| + vmovdqu xmm0,XMMWORD[96+rsi]
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x01
|
| + vpxor xmm5,xmm5,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x00
|
| + vpxor xmm3,xmm3,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x11
|
| + vpxor xmm4,xmm4,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| +
|
| +
|
| + vmovdqu xmm0,XMMWORD[80+rsi]
|
| +
|
| + vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10
|
| + vpalignr xmm1,xmm1,xmm1,8
|
| +
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x01
|
| + vpxor xmm5,xmm5,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x00
|
| + vpxor xmm3,xmm3,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x11
|
| + vpxor xmm4,xmm4,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| +
|
| + vpxor xmm1,xmm1,xmm7
|
| +
|
| + vmovdqu xmm0,XMMWORD[64+rsi]
|
| +
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x01
|
| + vpxor xmm5,xmm5,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x00
|
| + vpxor xmm3,xmm3,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x11
|
| + vpxor xmm4,xmm4,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| +
|
| + vmovdqu xmm0,XMMWORD[48+rsi]
|
| +
|
| + vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10
|
| + vpalignr xmm1,xmm1,xmm1,8
|
| +
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x01
|
| + vpxor xmm5,xmm5,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x00
|
| + vpxor xmm3,xmm3,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x11
|
| + vpxor xmm4,xmm4,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| +
|
| + vpxor xmm1,xmm1,xmm7
|
| +
|
| + vmovdqu xmm0,XMMWORD[32+rsi]
|
| +
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x01
|
| + vpxor xmm5,xmm5,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x00
|
| + vpxor xmm3,xmm3,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x11
|
| + vpxor xmm4,xmm4,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| +
|
| + vpxor xmm1,xmm1,xmm9
|
| +
|
| + vmovdqu xmm0,XMMWORD[16+rsi]
|
| +
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x01
|
| + vpxor xmm5,xmm5,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x00
|
| + vpxor xmm3,xmm3,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x11
|
| + vpxor xmm4,xmm4,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| +
|
| + vmovdqu xmm0,XMMWORD[rsi]
|
| + vpxor xmm0,xmm0,xmm1
|
| +
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x01
|
| + vpxor xmm5,xmm5,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x00
|
| + vpxor xmm3,xmm3,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x11
|
| + vpxor xmm4,xmm4,xmm6
|
| + vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x10
|
| + vpxor xmm5,xmm5,xmm6
|
| +
|
| +
|
| + vpsrldq xmm6,xmm5,8
|
| + vpslldq xmm5,xmm5,8
|
| +
|
| + vpxor xmm9,xmm4,xmm6
|
| + vpxor xmm1,xmm3,xmm5
|
| +
|
| + lea rsi,[128+rsi]
|
| + jmp NEAR $L$htable_polyval_main_loop
|
| +
|
| +
|
| +
|
| +$L$htable_polyval_out:
|
| + vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10
|
| + vpalignr xmm1,xmm1,xmm1,8
|
| + vpxor xmm1,xmm1,xmm6
|
| +
|
| + vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10
|
| + vpalignr xmm1,xmm1,xmm1,8
|
| + vpxor xmm1,xmm1,xmm6
|
| + vpxor xmm1,xmm1,xmm9
|
| +
|
| + vmovdqu XMMWORD[rcx],xmm1
|
| + vzeroupper
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aesgcmsiv_htable_polyval:
|
| +global aesgcmsiv_polyval_horner
|
| +
|
| +ALIGN 16
|
| +aesgcmsiv_polyval_horner:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aesgcmsiv_polyval_horner:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| +
|
| +
|
| +
|
| + test rcx,rcx
|
| + jnz NEAR $L$polyval_horner_start
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$polyval_horner_start:
|
| +
|
| +
|
| +
|
| + xor r10,r10
|
| + shl rcx,4
|
| +
|
| + vmovdqa xmm1,XMMWORD[rsi]
|
| + vmovdqa xmm0,XMMWORD[rdi]
|
| +
|
| +$L$polyval_horner_loop:
|
| + vpxor xmm0,xmm0,XMMWORD[r10*1+rdx]
|
| + call GFMUL
|
| +
|
| + add r10,16
|
| + cmp rcx,r10
|
| + jne NEAR $L$polyval_horner_loop
|
| +
|
| +
|
| + vmovdqa XMMWORD[rdi],xmm0
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aesgcmsiv_polyval_horner:
|
| +global aes128gcmsiv_aes_ks
|
| +
|
| +ALIGN 16
|
| +aes128gcmsiv_aes_ks:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes128gcmsiv_aes_ks:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| +
|
| +
|
| +
|
| + vmovdqa xmm1,XMMWORD[rdi]
|
| + vmovdqa XMMWORD[rsi],xmm1
|
| +
|
| + vmovdqa xmm0,XMMWORD[con1]
|
| + vmovdqa xmm15,XMMWORD[mask]
|
| +
|
| + mov rax,8
|
| +
|
| +$L$ks128_loop:
|
| + add rsi,16
|
| + sub rax,1
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpslldq xmm3,xmm1,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpslldq xmm3,xmm3,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpslldq xmm3,xmm3,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| + vmovdqa XMMWORD[rsi],xmm1
|
| + jne NEAR $L$ks128_loop
|
| +
|
| + vmovdqa xmm0,XMMWORD[con2]
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpslldq xmm3,xmm1,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpslldq xmm3,xmm3,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpslldq xmm3,xmm3,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| + vmovdqa XMMWORD[16+rsi],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslldq xmm3,xmm1,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpslldq xmm3,xmm3,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpslldq xmm3,xmm3,4
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| + vmovdqa XMMWORD[32+rsi],xmm1
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes128gcmsiv_aes_ks:
|
| +global aes256gcmsiv_aes_ks
|
| +
|
| +ALIGN 16
|
| +aes256gcmsiv_aes_ks:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes256gcmsiv_aes_ks:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| +
|
| +
|
| +
|
| + vmovdqa xmm1,XMMWORD[rdi]
|
| + vmovdqa xmm3,XMMWORD[16+rdi]
|
| + vmovdqa XMMWORD[rsi],xmm1
|
| + vmovdqa XMMWORD[16+rsi],xmm3
|
| + vmovdqa xmm0,XMMWORD[con1]
|
| + vmovdqa xmm15,XMMWORD[mask]
|
| + vpxor xmm14,xmm14,xmm14
|
| + mov rax,6
|
| +
|
| +$L$ks256_loop:
|
| + add rsi,32
|
| + sub rax,1
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm4,xmm1,32
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpshufb xmm4,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vmovdqa XMMWORD[rsi],xmm1
|
| + vpshufd xmm2,xmm1,0xff
|
| + vaesenclast xmm2,xmm2,xmm14
|
| + vpsllq xmm4,xmm3,32
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpshufb xmm4,xmm3,XMMWORD[con3]
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpxor xmm3,xmm3,xmm2
|
| + vmovdqa XMMWORD[16+rsi],xmm3
|
| + jne NEAR $L$ks256_loop
|
| +
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpsllq xmm4,xmm1,32
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpshufb xmm4,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vmovdqa XMMWORD[32+rsi],xmm1
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +global aes128gcmsiv_aes_ks_enc_x1
|
| +
|
| +ALIGN 16
|
| +aes128gcmsiv_aes_ks_enc_x1:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| +
|
| +
|
| +
|
| + vmovdqa xmm1,XMMWORD[rcx]
|
| + vmovdqa xmm4,XMMWORD[rdi]
|
| +
|
| + vmovdqa XMMWORD[rdx],xmm1
|
| + vpxor xmm4,xmm4,xmm1
|
| +
|
| + vmovdqa xmm0,XMMWORD[con1]
|
| + vmovdqa xmm15,XMMWORD[mask]
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[16+rdx],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[32+rdx],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[48+rdx],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[64+rdx],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[80+rdx],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[96+rdx],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[112+rdx],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[128+rdx],xmm1
|
| +
|
| +
|
| + vmovdqa xmm0,XMMWORD[con2]
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[144+rdx],xmm1
|
| +
|
| + vpshufb xmm2,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpsllq xmm3,xmm1,32
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpshufb xmm3,xmm1,XMMWORD[con3]
|
| + vpxor xmm1,xmm1,xmm3
|
| + vpxor xmm1,xmm1,xmm2
|
| +
|
| + vaesenclast xmm4,xmm4,xmm1
|
| + vmovdqa XMMWORD[160+rdx],xmm1
|
| +
|
| +
|
| + vmovdqa XMMWORD[rsi],xmm4
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes128gcmsiv_aes_ks_enc_x1:
|
| +global aes128gcmsiv_kdf
|
| +
|
| +ALIGN 16
|
| +aes128gcmsiv_kdf:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes128gcmsiv_kdf:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| + vmovdqa xmm1,XMMWORD[rdx]
|
| + vmovdqa xmm9,XMMWORD[rdi]
|
| + vmovdqa xmm12,XMMWORD[and_mask]
|
| + vmovdqa xmm13,XMMWORD[one]
|
| + vpshufd xmm9,xmm9,0x90
|
| + vpand xmm9,xmm9,xmm12
|
| + vpaddd xmm10,xmm9,xmm13
|
| + vpaddd xmm11,xmm10,xmm13
|
| + vpaddd xmm12,xmm11,xmm13
|
| +
|
| + vpxor xmm9,xmm9,xmm1
|
| + vpxor xmm10,xmm10,xmm1
|
| + vpxor xmm11,xmm11,xmm1
|
| + vpxor xmm12,xmm12,xmm1
|
| +
|
| + vmovdqa xmm1,XMMWORD[16+rdx]
|
| + vaesenc xmm9,xmm9,xmm1
|
| + vaesenc xmm10,xmm10,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[32+rdx]
|
| + vaesenc xmm9,xmm9,xmm2
|
| + vaesenc xmm10,xmm10,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[48+rdx]
|
| + vaesenc xmm9,xmm9,xmm1
|
| + vaesenc xmm10,xmm10,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[64+rdx]
|
| + vaesenc xmm9,xmm9,xmm2
|
| + vaesenc xmm10,xmm10,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[80+rdx]
|
| + vaesenc xmm9,xmm9,xmm1
|
| + vaesenc xmm10,xmm10,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[96+rdx]
|
| + vaesenc xmm9,xmm9,xmm2
|
| + vaesenc xmm10,xmm10,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[112+rdx]
|
| + vaesenc xmm9,xmm9,xmm1
|
| + vaesenc xmm10,xmm10,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[128+rdx]
|
| + vaesenc xmm9,xmm9,xmm2
|
| + vaesenc xmm10,xmm10,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[144+rdx]
|
| + vaesenc xmm9,xmm9,xmm1
|
| + vaesenc xmm10,xmm10,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[160+rdx]
|
| + vaesenclast xmm9,xmm9,xmm2
|
| + vaesenclast xmm10,xmm10,xmm2
|
| + vaesenclast xmm11,xmm11,xmm2
|
| + vaesenclast xmm12,xmm12,xmm2
|
| +
|
| +
|
| + vmovdqa XMMWORD[rsi],xmm9
|
| + vmovdqa XMMWORD[16+rsi],xmm10
|
| + vmovdqa XMMWORD[32+rsi],xmm11
|
| + vmovdqa XMMWORD[48+rsi],xmm12
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes128gcmsiv_kdf:
|
| +global aes128gcmsiv_enc_msg_x4
|
| +
|
| +ALIGN 16
|
| +aes128gcmsiv_enc_msg_x4:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes128gcmsiv_enc_msg_x4:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| + mov r8,QWORD[40+rsp]
|
| +
|
| +
|
| +
|
| + test r8,r8
|
| + jnz NEAR $L$128_enc_msg_x4_start
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$128_enc_msg_x4_start:
|
| + push r12
|
| +
|
| + push r13
|
| +
|
| +
|
| + shr r8,4
|
| + mov r10,r8
|
| + shl r10,62
|
| + shr r10,62
|
| +
|
| +
|
| + vmovdqa xmm15,XMMWORD[rdx]
|
| + vpor xmm15,xmm15,XMMWORD[OR_MASK]
|
| +
|
| + vmovdqu xmm4,XMMWORD[four]
|
| + vmovdqa xmm0,xmm15
|
| + vpaddd xmm1,xmm15,XMMWORD[one]
|
| + vpaddd xmm2,xmm15,XMMWORD[two]
|
| + vpaddd xmm3,xmm15,XMMWORD[three]
|
| +
|
| + shr r8,2
|
| + je NEAR $L$128_enc_msg_x4_check_remainder
|
| +
|
| + sub rsi,64
|
| + sub rdi,64
|
| +
|
| +$L$128_enc_msg_x4_loop1:
|
| + add rsi,64
|
| + add rdi,64
|
| +
|
| + vmovdqa xmm5,xmm0
|
| + vmovdqa xmm6,xmm1
|
| + vmovdqa xmm7,xmm2
|
| + vmovdqa xmm8,xmm3
|
| +
|
| + vpxor xmm5,xmm5,XMMWORD[rcx]
|
| + vpxor xmm6,xmm6,XMMWORD[rcx]
|
| + vpxor xmm7,xmm7,XMMWORD[rcx]
|
| + vpxor xmm8,xmm8,XMMWORD[rcx]
|
| +
|
| + vmovdqu xmm12,XMMWORD[16+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vpaddd xmm0,xmm0,xmm4
|
| + vmovdqu xmm12,XMMWORD[32+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vpaddd xmm1,xmm1,xmm4
|
| + vmovdqu xmm12,XMMWORD[48+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vpaddd xmm2,xmm2,xmm4
|
| + vmovdqu xmm12,XMMWORD[64+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vpaddd xmm3,xmm3,xmm4
|
| +
|
| + vmovdqu xmm12,XMMWORD[80+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[96+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[112+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[128+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[144+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[160+rcx]
|
| + vaesenclast xmm5,xmm5,xmm12
|
| + vaesenclast xmm6,xmm6,xmm12
|
| + vaesenclast xmm7,xmm7,xmm12
|
| + vaesenclast xmm8,xmm8,xmm12
|
| +
|
| +
|
| +
|
| + vpxor xmm5,xmm5,XMMWORD[rdi]
|
| + vpxor xmm6,xmm6,XMMWORD[16+rdi]
|
| + vpxor xmm7,xmm7,XMMWORD[32+rdi]
|
| + vpxor xmm8,xmm8,XMMWORD[48+rdi]
|
| +
|
| + sub r8,1
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm5
|
| + vmovdqu XMMWORD[16+rsi],xmm6
|
| + vmovdqu XMMWORD[32+rsi],xmm7
|
| + vmovdqu XMMWORD[48+rsi],xmm8
|
| +
|
| + jne NEAR $L$128_enc_msg_x4_loop1
|
| +
|
| + add rsi,64
|
| + add rdi,64
|
| +
|
| +$L$128_enc_msg_x4_check_remainder:
|
| + cmp r10,0
|
| + je NEAR $L$128_enc_msg_x4_out
|
| +
|
| +$L$128_enc_msg_x4_loop2:
|
| +
|
| +
|
| + vmovdqa xmm5,xmm0
|
| + vpaddd xmm0,xmm0,XMMWORD[one]
|
| +
|
| + vpxor xmm5,xmm5,XMMWORD[rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[16+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[32+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[48+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[64+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[80+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[96+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[112+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[128+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[144+rcx]
|
| + vaesenclast xmm5,xmm5,XMMWORD[160+rcx]
|
| +
|
| +
|
| + vpxor xmm5,xmm5,XMMWORD[rdi]
|
| + vmovdqu XMMWORD[rsi],xmm5
|
| +
|
| + add rdi,16
|
| + add rsi,16
|
| +
|
| + sub r10,1
|
| + jne NEAR $L$128_enc_msg_x4_loop2
|
| +
|
| +$L$128_enc_msg_x4_out:
|
| + pop r13
|
| +
|
| + pop r12
|
| +
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes128gcmsiv_enc_msg_x4:
|
| +global aes128gcmsiv_enc_msg_x8
|
| +
|
| +ALIGN 16
|
| +aes128gcmsiv_enc_msg_x8:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes128gcmsiv_enc_msg_x8:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| + mov r8,QWORD[40+rsp]
|
| +
|
| +
|
| +
|
| + test r8,r8
|
| + jnz NEAR $L$128_enc_msg_x8_start
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$128_enc_msg_x8_start:
|
| + push r12
|
| +
|
| + push r13
|
| +
|
| + push rbp
|
| +
|
| + mov rbp,rsp
|
| +
|
| +
|
| +
|
| + sub rsp,128
|
| + and rsp,-64
|
| +
|
| + shr r8,4
|
| + mov r10,r8
|
| + shl r10,61
|
| + shr r10,61
|
| +
|
| +
|
| + vmovdqu xmm1,XMMWORD[rdx]
|
| + vpor xmm1,xmm1,XMMWORD[OR_MASK]
|
| +
|
| +
|
| + vpaddd xmm0,xmm1,XMMWORD[seven]
|
| + vmovdqu XMMWORD[rsp],xmm0
|
| + vpaddd xmm9,xmm1,XMMWORD[one]
|
| + vpaddd xmm10,xmm1,XMMWORD[two]
|
| + vpaddd xmm11,xmm1,XMMWORD[three]
|
| + vpaddd xmm12,xmm1,XMMWORD[four]
|
| + vpaddd xmm13,xmm1,XMMWORD[five]
|
| + vpaddd xmm14,xmm1,XMMWORD[six]
|
| + vmovdqa xmm0,xmm1
|
| +
|
| + shr r8,3
|
| + je NEAR $L$128_enc_msg_x8_check_remainder
|
| +
|
| + sub rsi,128
|
| + sub rdi,128
|
| +
|
| +$L$128_enc_msg_x8_loop1:
|
| + add rsi,128
|
| + add rdi,128
|
| +
|
| + vmovdqa xmm1,xmm0
|
| + vmovdqa xmm2,xmm9
|
| + vmovdqa xmm3,xmm10
|
| + vmovdqa xmm4,xmm11
|
| + vmovdqa xmm5,xmm12
|
| + vmovdqa xmm6,xmm13
|
| + vmovdqa xmm7,xmm14
|
| +
|
| + vmovdqu xmm8,XMMWORD[rsp]
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rcx]
|
| + vpxor xmm2,xmm2,XMMWORD[rcx]
|
| + vpxor xmm3,xmm3,XMMWORD[rcx]
|
| + vpxor xmm4,xmm4,XMMWORD[rcx]
|
| + vpxor xmm5,xmm5,XMMWORD[rcx]
|
| + vpxor xmm6,xmm6,XMMWORD[rcx]
|
| + vpxor xmm7,xmm7,XMMWORD[rcx]
|
| + vpxor xmm8,xmm8,XMMWORD[rcx]
|
| +
|
| + vmovdqu xmm15,XMMWORD[16+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vmovdqu xmm14,XMMWORD[rsp]
|
| + vpaddd xmm14,xmm14,XMMWORD[eight]
|
| + vmovdqu XMMWORD[rsp],xmm14
|
| + vmovdqu xmm15,XMMWORD[32+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpsubd xmm14,xmm14,XMMWORD[one]
|
| + vmovdqu xmm15,XMMWORD[48+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm0,xmm0,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[64+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm9,xmm9,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[80+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm10,xmm10,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[96+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm11,xmm11,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[112+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm12,xmm12,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[128+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm13,xmm13,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[144+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vmovdqu xmm15,XMMWORD[160+rcx]
|
| + vaesenclast xmm1,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm15
|
| + vaesenclast xmm3,xmm3,xmm15
|
| + vaesenclast xmm4,xmm4,xmm15
|
| + vaesenclast xmm5,xmm5,xmm15
|
| + vaesenclast xmm6,xmm6,xmm15
|
| + vaesenclast xmm7,xmm7,xmm15
|
| + vaesenclast xmm8,xmm8,xmm15
|
| +
|
| +
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rdi]
|
| + vpxor xmm2,xmm2,XMMWORD[16+rdi]
|
| + vpxor xmm3,xmm3,XMMWORD[32+rdi]
|
| + vpxor xmm4,xmm4,XMMWORD[48+rdi]
|
| + vpxor xmm5,xmm5,XMMWORD[64+rdi]
|
| + vpxor xmm6,xmm6,XMMWORD[80+rdi]
|
| + vpxor xmm7,xmm7,XMMWORD[96+rdi]
|
| + vpxor xmm8,xmm8,XMMWORD[112+rdi]
|
| +
|
| + dec r8
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm1
|
| + vmovdqu XMMWORD[16+rsi],xmm2
|
| + vmovdqu XMMWORD[32+rsi],xmm3
|
| + vmovdqu XMMWORD[48+rsi],xmm4
|
| + vmovdqu XMMWORD[64+rsi],xmm5
|
| + vmovdqu XMMWORD[80+rsi],xmm6
|
| + vmovdqu XMMWORD[96+rsi],xmm7
|
| + vmovdqu XMMWORD[112+rsi],xmm8
|
| +
|
| + jne NEAR $L$128_enc_msg_x8_loop1
|
| +
|
| + add rsi,128
|
| + add rdi,128
|
| +
|
| +$L$128_enc_msg_x8_check_remainder:
|
| + cmp r10,0
|
| + je NEAR $L$128_enc_msg_x8_out
|
| +
|
| +$L$128_enc_msg_x8_loop2:
|
| +
|
| +
|
| + vmovdqa xmm1,xmm0
|
| + vpaddd xmm0,xmm0,XMMWORD[one]
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[16+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[32+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[48+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[64+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[80+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[96+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[112+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[128+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[144+rcx]
|
| + vaesenclast xmm1,xmm1,XMMWORD[160+rcx]
|
| +
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rdi]
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm1
|
| +
|
| + add rdi,16
|
| + add rsi,16
|
| +
|
| + dec r10
|
| + jne NEAR $L$128_enc_msg_x8_loop2
|
| +
|
| +$L$128_enc_msg_x8_out:
|
| + mov rsp,rbp
|
| +
|
| + pop rbp
|
| +
|
| + pop r13
|
| +
|
| + pop r12
|
| +
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes128gcmsiv_enc_msg_x8:
|
| +global aes128gcmsiv_dec
|
| +
|
| +ALIGN 16
|
| +aes128gcmsiv_dec:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes128gcmsiv_dec:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| + mov r8,QWORD[40+rsp]
|
| + mov r9,QWORD[48+rsp]
|
| +
|
| +
|
| +
|
| + test r9,~15
|
| + jnz NEAR $L$128_dec_start
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$128_dec_start:
|
| + vzeroupper
|
| + vmovdqa xmm0,XMMWORD[rdx]
|
| + mov rax,rdx
|
| +
|
| + lea rax,[32+rax]
|
| + lea rcx,[32+rcx]
|
| +
|
| +
|
| + vmovdqu xmm15,XMMWORD[r9*1+rdi]
|
| + vpor xmm15,xmm15,XMMWORD[OR_MASK]
|
| + and r9,~15
|
| +
|
| +
|
| + cmp r9,96
|
| + jb NEAR $L$128_dec_loop2
|
| +
|
| +
|
| + sub r9,96
|
| + vmovdqa xmm7,xmm15
|
| + vpaddd xmm8,xmm7,XMMWORD[one]
|
| + vpaddd xmm9,xmm7,XMMWORD[two]
|
| + vpaddd xmm10,xmm9,XMMWORD[one]
|
| + vpaddd xmm11,xmm9,XMMWORD[two]
|
| + vpaddd xmm12,xmm11,XMMWORD[one]
|
| + vpaddd xmm15,xmm11,XMMWORD[two]
|
| +
|
| + vpxor xmm7,xmm7,XMMWORD[r8]
|
| + vpxor xmm8,xmm8,XMMWORD[r8]
|
| + vpxor xmm9,xmm9,XMMWORD[r8]
|
| + vpxor xmm10,xmm10,XMMWORD[r8]
|
| + vpxor xmm11,xmm11,XMMWORD[r8]
|
| + vpxor xmm12,xmm12,XMMWORD[r8]
|
| +
|
| + vmovdqu xmm4,XMMWORD[16+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[32+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[48+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[64+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[80+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[96+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[112+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[128+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[144+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[160+r8]
|
| + vaesenclast xmm7,xmm7,xmm4
|
| + vaesenclast xmm8,xmm8,xmm4
|
| + vaesenclast xmm9,xmm9,xmm4
|
| + vaesenclast xmm10,xmm10,xmm4
|
| + vaesenclast xmm11,xmm11,xmm4
|
| + vaesenclast xmm12,xmm12,xmm4
|
| +
|
| +
|
| + vpxor xmm7,xmm7,XMMWORD[rdi]
|
| + vpxor xmm8,xmm8,XMMWORD[16+rdi]
|
| + vpxor xmm9,xmm9,XMMWORD[32+rdi]
|
| + vpxor xmm10,xmm10,XMMWORD[48+rdi]
|
| + vpxor xmm11,xmm11,XMMWORD[64+rdi]
|
| + vpxor xmm12,xmm12,XMMWORD[80+rdi]
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm7
|
| + vmovdqu XMMWORD[16+rsi],xmm8
|
| + vmovdqu XMMWORD[32+rsi],xmm9
|
| + vmovdqu XMMWORD[48+rsi],xmm10
|
| + vmovdqu XMMWORD[64+rsi],xmm11
|
| + vmovdqu XMMWORD[80+rsi],xmm12
|
| +
|
| + add rdi,96
|
| + add rsi,96
|
| + jmp NEAR $L$128_dec_loop1
|
| +
|
| +
|
| +ALIGN 64
|
| +$L$128_dec_loop1:
|
| + cmp r9,96
|
| + jb NEAR $L$128_dec_finish_96
|
| + sub r9,96
|
| +
|
| + vmovdqa xmm6,xmm12
|
| + vmovdqa XMMWORD[(16-32)+rax],xmm11
|
| + vmovdqa XMMWORD[(32-32)+rax],xmm10
|
| + vmovdqa XMMWORD[(48-32)+rax],xmm9
|
| + vmovdqa XMMWORD[(64-32)+rax],xmm8
|
| + vmovdqa XMMWORD[(80-32)+rax],xmm7
|
| +
|
| + vmovdqa xmm7,xmm15
|
| + vpaddd xmm8,xmm7,XMMWORD[one]
|
| + vpaddd xmm9,xmm7,XMMWORD[two]
|
| + vpaddd xmm10,xmm9,XMMWORD[one]
|
| + vpaddd xmm11,xmm9,XMMWORD[two]
|
| + vpaddd xmm12,xmm11,XMMWORD[one]
|
| + vpaddd xmm15,xmm11,XMMWORD[two]
|
| +
|
| + vmovdqa xmm4,XMMWORD[r8]
|
| + vpxor xmm7,xmm7,xmm4
|
| + vpxor xmm8,xmm8,xmm4
|
| + vpxor xmm9,xmm9,xmm4
|
| + vpxor xmm10,xmm10,xmm4
|
| + vpxor xmm11,xmm11,xmm4
|
| + vpxor xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[((0-32))+rcx]
|
| + vpclmulqdq xmm2,xmm6,xmm4,0x11
|
| + vpclmulqdq xmm3,xmm6,xmm4,0x00
|
| + vpclmulqdq xmm1,xmm6,xmm4,0x01
|
| + vpclmulqdq xmm4,xmm6,xmm4,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[16+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[((-16))+rax]
|
| + vmovdqu xmm13,XMMWORD[((-16))+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm4,XMMWORD[32+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[rax]
|
| + vmovdqu xmm13,XMMWORD[rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm4,XMMWORD[48+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[16+rax]
|
| + vmovdqu xmm13,XMMWORD[16+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm4,XMMWORD[64+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[32+rax]
|
| + vmovdqu xmm13,XMMWORD[32+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm4,XMMWORD[80+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[96+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[112+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| +
|
| + vmovdqa xmm6,XMMWORD[((80-32))+rax]
|
| + vpxor xmm6,xmm6,xmm0
|
| + vmovdqu xmm5,XMMWORD[((80-32))+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[128+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| +
|
| + vpsrldq xmm4,xmm1,8
|
| + vpxor xmm5,xmm2,xmm4
|
| + vpslldq xmm4,xmm1,8
|
| + vpxor xmm0,xmm3,xmm4
|
| +
|
| + vmovdqa xmm3,XMMWORD[poly]
|
| +
|
| + vmovdqu xmm4,XMMWORD[144+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[160+r8]
|
| + vpalignr xmm2,xmm0,xmm0,8
|
| + vpclmulqdq xmm0,xmm0,xmm3,0x10
|
| + vpxor xmm0,xmm2,xmm0
|
| +
|
| + vpxor xmm4,xmm6,XMMWORD[rdi]
|
| + vaesenclast xmm7,xmm7,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[16+rdi]
|
| + vaesenclast xmm8,xmm8,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[32+rdi]
|
| + vaesenclast xmm9,xmm9,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[48+rdi]
|
| + vaesenclast xmm10,xmm10,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[64+rdi]
|
| + vaesenclast xmm11,xmm11,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[80+rdi]
|
| + vaesenclast xmm12,xmm12,xmm4
|
| +
|
| + vpalignr xmm2,xmm0,xmm0,8
|
| + vpclmulqdq xmm0,xmm0,xmm3,0x10
|
| + vpxor xmm0,xmm2,xmm0
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm7
|
| + vmovdqu XMMWORD[16+rsi],xmm8
|
| + vmovdqu XMMWORD[32+rsi],xmm9
|
| + vmovdqu XMMWORD[48+rsi],xmm10
|
| + vmovdqu XMMWORD[64+rsi],xmm11
|
| + vmovdqu XMMWORD[80+rsi],xmm12
|
| +
|
| + vpxor xmm0,xmm0,xmm5
|
| +
|
| + lea rdi,[96+rdi]
|
| + lea rsi,[96+rsi]
|
| + jmp NEAR $L$128_dec_loop1
|
| +
|
| +$L$128_dec_finish_96:
|
| + vmovdqa xmm6,xmm12
|
| + vmovdqa XMMWORD[(16-32)+rax],xmm11
|
| + vmovdqa XMMWORD[(32-32)+rax],xmm10
|
| + vmovdqa XMMWORD[(48-32)+rax],xmm9
|
| + vmovdqa XMMWORD[(64-32)+rax],xmm8
|
| + vmovdqa XMMWORD[(80-32)+rax],xmm7
|
| +
|
| + vmovdqu xmm4,XMMWORD[((0-32))+rcx]
|
| + vpclmulqdq xmm1,xmm6,xmm4,0x10
|
| + vpclmulqdq xmm2,xmm6,xmm4,0x11
|
| + vpclmulqdq xmm3,xmm6,xmm4,0x00
|
| + vpclmulqdq xmm4,xmm6,xmm4,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[((-16))+rax]
|
| + vmovdqu xmm13,XMMWORD[((-16))+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[rax]
|
| + vmovdqu xmm13,XMMWORD[rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[16+rax]
|
| + vmovdqu xmm13,XMMWORD[16+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[32+rax]
|
| + vmovdqu xmm13,XMMWORD[32+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm6,XMMWORD[((80-32))+rax]
|
| + vpxor xmm6,xmm6,xmm0
|
| + vmovdqu xmm5,XMMWORD[((80-32))+rcx]
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vpsrldq xmm4,xmm1,8
|
| + vpxor xmm5,xmm2,xmm4
|
| + vpslldq xmm4,xmm1,8
|
| + vpxor xmm0,xmm3,xmm4
|
| +
|
| + vmovdqa xmm3,XMMWORD[poly]
|
| +
|
| + vpalignr xmm2,xmm0,xmm0,8
|
| + vpclmulqdq xmm0,xmm0,xmm3,0x10
|
| + vpxor xmm0,xmm2,xmm0
|
| +
|
| + vpalignr xmm2,xmm0,xmm0,8
|
| + vpclmulqdq xmm0,xmm0,xmm3,0x10
|
| + vpxor xmm0,xmm2,xmm0
|
| +
|
| + vpxor xmm0,xmm0,xmm5
|
| +
|
| +$L$128_dec_loop2:
|
| +
|
| +
|
| +
|
| + cmp r9,16
|
| + jb NEAR $L$128_dec_out
|
| + sub r9,16
|
| +
|
| + vmovdqa xmm2,xmm15
|
| + vpaddd xmm15,xmm15,XMMWORD[one]
|
| +
|
| + vpxor xmm2,xmm2,XMMWORD[r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[16+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[32+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[48+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[64+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[80+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[96+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[112+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[128+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[144+r8]
|
| + vaesenclast xmm2,xmm2,XMMWORD[160+r8]
|
| + vpxor xmm2,xmm2,XMMWORD[rdi]
|
| + vmovdqu XMMWORD[rsi],xmm2
|
| + add rdi,16
|
| + add rsi,16
|
| +
|
| + vpxor xmm0,xmm0,xmm2
|
| + vmovdqa xmm1,XMMWORD[((-32))+rcx]
|
| + call GFMUL
|
| +
|
| + jmp NEAR $L$128_dec_loop2
|
| +
|
| +$L$128_dec_out:
|
| + vmovdqu XMMWORD[rdx],xmm0
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes128gcmsiv_dec:
|
| +global aes128gcmsiv_ecb_enc_block
|
| +
|
| +ALIGN 16
|
| +aes128gcmsiv_ecb_enc_block:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes128gcmsiv_ecb_enc_block:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| +
|
| +
|
| +
|
| + vmovdqa xmm1,XMMWORD[rdi]
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[16+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[32+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[48+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[64+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[80+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[96+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[112+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[128+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[144+rdx]
|
| + vaesenclast xmm1,xmm1,XMMWORD[160+rdx]
|
| +
|
| + vmovdqa XMMWORD[rsi],xmm1
|
| +
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes128gcmsiv_ecb_enc_block:
|
| +global aes256gcmsiv_aes_ks_enc_x1
|
| +
|
| +ALIGN 16
|
| +aes256gcmsiv_aes_ks_enc_x1:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| +
|
| +
|
| +
|
| + vmovdqa xmm0,XMMWORD[con1]
|
| + vmovdqa xmm15,XMMWORD[mask]
|
| + vmovdqa xmm8,XMMWORD[rdi]
|
| + vmovdqa xmm1,XMMWORD[rcx]
|
| + vmovdqa xmm3,XMMWORD[16+rcx]
|
| + vpxor xmm8,xmm8,xmm1
|
| + vaesenc xmm8,xmm8,xmm3
|
| + vmovdqu XMMWORD[rdx],xmm1
|
| + vmovdqu XMMWORD[16+rdx],xmm3
|
| + vpxor xmm14,xmm14,xmm14
|
| +
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpslldq xmm4,xmm1,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vaesenc xmm8,xmm8,xmm1
|
| + vmovdqu XMMWORD[32+rdx],xmm1
|
| +
|
| + vpshufd xmm2,xmm1,0xff
|
| + vaesenclast xmm2,xmm2,xmm14
|
| + vpslldq xmm4,xmm3,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpxor xmm3,xmm3,xmm2
|
| + vaesenc xmm8,xmm8,xmm3
|
| + vmovdqu XMMWORD[48+rdx],xmm3
|
| +
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpslldq xmm4,xmm1,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vaesenc xmm8,xmm8,xmm1
|
| + vmovdqu XMMWORD[64+rdx],xmm1
|
| +
|
| + vpshufd xmm2,xmm1,0xff
|
| + vaesenclast xmm2,xmm2,xmm14
|
| + vpslldq xmm4,xmm3,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpxor xmm3,xmm3,xmm2
|
| + vaesenc xmm8,xmm8,xmm3
|
| + vmovdqu XMMWORD[80+rdx],xmm3
|
| +
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpslldq xmm4,xmm1,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vaesenc xmm8,xmm8,xmm1
|
| + vmovdqu XMMWORD[96+rdx],xmm1
|
| +
|
| + vpshufd xmm2,xmm1,0xff
|
| + vaesenclast xmm2,xmm2,xmm14
|
| + vpslldq xmm4,xmm3,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpxor xmm3,xmm3,xmm2
|
| + vaesenc xmm8,xmm8,xmm3
|
| + vmovdqu XMMWORD[112+rdx],xmm3
|
| +
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpslldq xmm4,xmm1,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vaesenc xmm8,xmm8,xmm1
|
| + vmovdqu XMMWORD[128+rdx],xmm1
|
| +
|
| + vpshufd xmm2,xmm1,0xff
|
| + vaesenclast xmm2,xmm2,xmm14
|
| + vpslldq xmm4,xmm3,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpxor xmm3,xmm3,xmm2
|
| + vaesenc xmm8,xmm8,xmm3
|
| + vmovdqu XMMWORD[144+rdx],xmm3
|
| +
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpslldq xmm4,xmm1,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vaesenc xmm8,xmm8,xmm1
|
| + vmovdqu XMMWORD[160+rdx],xmm1
|
| +
|
| + vpshufd xmm2,xmm1,0xff
|
| + vaesenclast xmm2,xmm2,xmm14
|
| + vpslldq xmm4,xmm3,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpxor xmm3,xmm3,xmm2
|
| + vaesenc xmm8,xmm8,xmm3
|
| + vmovdqu XMMWORD[176+rdx],xmm3
|
| +
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslld xmm0,xmm0,1
|
| + vpslldq xmm4,xmm1,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vaesenc xmm8,xmm8,xmm1
|
| + vmovdqu XMMWORD[192+rdx],xmm1
|
| +
|
| + vpshufd xmm2,xmm1,0xff
|
| + vaesenclast xmm2,xmm2,xmm14
|
| + vpslldq xmm4,xmm3,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpxor xmm3,xmm3,xmm2
|
| + vaesenc xmm8,xmm8,xmm3
|
| + vmovdqu XMMWORD[208+rdx],xmm3
|
| +
|
| + vpshufb xmm2,xmm3,xmm15
|
| + vaesenclast xmm2,xmm2,xmm0
|
| + vpslldq xmm4,xmm1,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpslldq xmm4,xmm4,4
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpxor xmm1,xmm1,xmm2
|
| + vaesenclast xmm8,xmm8,xmm1
|
| + vmovdqu XMMWORD[224+rdx],xmm1
|
| +
|
| + vmovdqa XMMWORD[rsi],xmm8
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes256gcmsiv_aes_ks_enc_x1:
|
| +global aes256gcmsiv_ecb_enc_block
|
| +
|
| +ALIGN 16
|
| +aes256gcmsiv_ecb_enc_block:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes256gcmsiv_ecb_enc_block:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| +
|
| +
|
| +
|
| + vmovdqa xmm1,XMMWORD[rdi]
|
| + vpxor xmm1,xmm1,XMMWORD[rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[16+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[32+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[48+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[64+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[80+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[96+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[112+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[128+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[144+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[160+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[176+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[192+rdx]
|
| + vaesenc xmm1,xmm1,XMMWORD[208+rdx]
|
| + vaesenclast xmm1,xmm1,XMMWORD[224+rdx]
|
| + vmovdqa XMMWORD[rsi],xmm1
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes256gcmsiv_ecb_enc_block:
|
| +global aes256gcmsiv_enc_msg_x4
|
| +
|
| +ALIGN 16
|
| +aes256gcmsiv_enc_msg_x4:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes256gcmsiv_enc_msg_x4:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| + mov r8,QWORD[40+rsp]
|
| +
|
| +
|
| +
|
| + test r8,r8
|
| + jnz NEAR $L$256_enc_msg_x4_start
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$256_enc_msg_x4_start:
|
| + mov r10,r8
|
| + shr r8,4
|
| + shl r10,60
|
| + jz NEAR $L$256_enc_msg_x4_start2
|
| + add r8,1
|
| +
|
| +$L$256_enc_msg_x4_start2:
|
| + mov r10,r8
|
| + shl r10,62
|
| + shr r10,62
|
| +
|
| +
|
| + vmovdqa xmm15,XMMWORD[rdx]
|
| + vpor xmm15,xmm15,XMMWORD[OR_MASK]
|
| +
|
| + vmovdqa xmm4,XMMWORD[four]
|
| + vmovdqa xmm0,xmm15
|
| + vpaddd xmm1,xmm15,XMMWORD[one]
|
| + vpaddd xmm2,xmm15,XMMWORD[two]
|
| + vpaddd xmm3,xmm15,XMMWORD[three]
|
| +
|
| + shr r8,2
|
| + je NEAR $L$256_enc_msg_x4_check_remainder
|
| +
|
| + sub rsi,64
|
| + sub rdi,64
|
| +
|
| +$L$256_enc_msg_x4_loop1:
|
| + add rsi,64
|
| + add rdi,64
|
| +
|
| + vmovdqa xmm5,xmm0
|
| + vmovdqa xmm6,xmm1
|
| + vmovdqa xmm7,xmm2
|
| + vmovdqa xmm8,xmm3
|
| +
|
| + vpxor xmm5,xmm5,XMMWORD[rcx]
|
| + vpxor xmm6,xmm6,XMMWORD[rcx]
|
| + vpxor xmm7,xmm7,XMMWORD[rcx]
|
| + vpxor xmm8,xmm8,XMMWORD[rcx]
|
| +
|
| + vmovdqu xmm12,XMMWORD[16+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vpaddd xmm0,xmm0,xmm4
|
| + vmovdqu xmm12,XMMWORD[32+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vpaddd xmm1,xmm1,xmm4
|
| + vmovdqu xmm12,XMMWORD[48+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vpaddd xmm2,xmm2,xmm4
|
| + vmovdqu xmm12,XMMWORD[64+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vpaddd xmm3,xmm3,xmm4
|
| +
|
| + vmovdqu xmm12,XMMWORD[80+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[96+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[112+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[128+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[144+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[160+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[176+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[192+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[208+rcx]
|
| + vaesenc xmm5,xmm5,xmm12
|
| + vaesenc xmm6,xmm6,xmm12
|
| + vaesenc xmm7,xmm7,xmm12
|
| + vaesenc xmm8,xmm8,xmm12
|
| +
|
| + vmovdqu xmm12,XMMWORD[224+rcx]
|
| + vaesenclast xmm5,xmm5,xmm12
|
| + vaesenclast xmm6,xmm6,xmm12
|
| + vaesenclast xmm7,xmm7,xmm12
|
| + vaesenclast xmm8,xmm8,xmm12
|
| +
|
| +
|
| +
|
| + vpxor xmm5,xmm5,XMMWORD[rdi]
|
| + vpxor xmm6,xmm6,XMMWORD[16+rdi]
|
| + vpxor xmm7,xmm7,XMMWORD[32+rdi]
|
| + vpxor xmm8,xmm8,XMMWORD[48+rdi]
|
| +
|
| + sub r8,1
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm5
|
| + vmovdqu XMMWORD[16+rsi],xmm6
|
| + vmovdqu XMMWORD[32+rsi],xmm7
|
| + vmovdqu XMMWORD[48+rsi],xmm8
|
| +
|
| + jne NEAR $L$256_enc_msg_x4_loop1
|
| +
|
| + add rsi,64
|
| + add rdi,64
|
| +
|
| +$L$256_enc_msg_x4_check_remainder:
|
| + cmp r10,0
|
| + je NEAR $L$256_enc_msg_x4_out
|
| +
|
| +$L$256_enc_msg_x4_loop2:
|
| +
|
| +
|
| +
|
| + vmovdqa xmm5,xmm0
|
| + vpaddd xmm0,xmm0,XMMWORD[one]
|
| + vpxor xmm5,xmm5,XMMWORD[rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[16+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[32+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[48+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[64+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[80+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[96+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[112+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[128+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[144+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[160+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[176+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[192+rcx]
|
| + vaesenc xmm5,xmm5,XMMWORD[208+rcx]
|
| + vaesenclast xmm5,xmm5,XMMWORD[224+rcx]
|
| +
|
| +
|
| + vpxor xmm5,xmm5,XMMWORD[rdi]
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm5
|
| +
|
| + add rdi,16
|
| + add rsi,16
|
| +
|
| + sub r10,1
|
| + jne NEAR $L$256_enc_msg_x4_loop2
|
| +
|
| +$L$256_enc_msg_x4_out:
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes256gcmsiv_enc_msg_x4:
|
| +global aes256gcmsiv_enc_msg_x8
|
| +
|
| +ALIGN 16
|
| +aes256gcmsiv_enc_msg_x8:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes256gcmsiv_enc_msg_x8:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| + mov r8,QWORD[40+rsp]
|
| +
|
| +
|
| +
|
| + test r8,r8
|
| + jnz NEAR $L$256_enc_msg_x8_start
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$256_enc_msg_x8_start:
|
| +
|
| + mov r11,rsp
|
| + sub r11,16
|
| + and r11,-64
|
| +
|
| + mov r10,r8
|
| + shr r8,4
|
| + shl r10,60
|
| + jz NEAR $L$256_enc_msg_x8_start2
|
| + add r8,1
|
| +
|
| +$L$256_enc_msg_x8_start2:
|
| + mov r10,r8
|
| + shl r10,61
|
| + shr r10,61
|
| +
|
| +
|
| + vmovdqa xmm1,XMMWORD[rdx]
|
| + vpor xmm1,xmm1,XMMWORD[OR_MASK]
|
| +
|
| +
|
| + vpaddd xmm0,xmm1,XMMWORD[seven]
|
| + vmovdqa XMMWORD[r11],xmm0
|
| + vpaddd xmm9,xmm1,XMMWORD[one]
|
| + vpaddd xmm10,xmm1,XMMWORD[two]
|
| + vpaddd xmm11,xmm1,XMMWORD[three]
|
| + vpaddd xmm12,xmm1,XMMWORD[four]
|
| + vpaddd xmm13,xmm1,XMMWORD[five]
|
| + vpaddd xmm14,xmm1,XMMWORD[six]
|
| + vmovdqa xmm0,xmm1
|
| +
|
| + shr r8,3
|
| + jz NEAR $L$256_enc_msg_x8_check_remainder
|
| +
|
| + sub rsi,128
|
| + sub rdi,128
|
| +
|
| +$L$256_enc_msg_x8_loop1:
|
| + add rsi,128
|
| + add rdi,128
|
| +
|
| + vmovdqa xmm1,xmm0
|
| + vmovdqa xmm2,xmm9
|
| + vmovdqa xmm3,xmm10
|
| + vmovdqa xmm4,xmm11
|
| + vmovdqa xmm5,xmm12
|
| + vmovdqa xmm6,xmm13
|
| + vmovdqa xmm7,xmm14
|
| +
|
| + vmovdqa xmm8,XMMWORD[r11]
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rcx]
|
| + vpxor xmm2,xmm2,XMMWORD[rcx]
|
| + vpxor xmm3,xmm3,XMMWORD[rcx]
|
| + vpxor xmm4,xmm4,XMMWORD[rcx]
|
| + vpxor xmm5,xmm5,XMMWORD[rcx]
|
| + vpxor xmm6,xmm6,XMMWORD[rcx]
|
| + vpxor xmm7,xmm7,XMMWORD[rcx]
|
| + vpxor xmm8,xmm8,XMMWORD[rcx]
|
| +
|
| + vmovdqu xmm15,XMMWORD[16+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vmovdqa xmm14,XMMWORD[r11]
|
| + vpaddd xmm14,xmm14,XMMWORD[eight]
|
| + vmovdqa XMMWORD[r11],xmm14
|
| + vmovdqu xmm15,XMMWORD[32+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpsubd xmm14,xmm14,XMMWORD[one]
|
| + vmovdqu xmm15,XMMWORD[48+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm0,xmm0,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[64+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm9,xmm9,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[80+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm10,xmm10,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[96+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm11,xmm11,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[112+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm12,xmm12,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[128+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vpaddd xmm13,xmm13,XMMWORD[eight]
|
| + vmovdqu xmm15,XMMWORD[144+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vmovdqu xmm15,XMMWORD[160+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vmovdqu xmm15,XMMWORD[176+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vmovdqu xmm15,XMMWORD[192+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vmovdqu xmm15,XMMWORD[208+rcx]
|
| + vaesenc xmm1,xmm1,xmm15
|
| + vaesenc xmm2,xmm2,xmm15
|
| + vaesenc xmm3,xmm3,xmm15
|
| + vaesenc xmm4,xmm4,xmm15
|
| + vaesenc xmm5,xmm5,xmm15
|
| + vaesenc xmm6,xmm6,xmm15
|
| + vaesenc xmm7,xmm7,xmm15
|
| + vaesenc xmm8,xmm8,xmm15
|
| +
|
| + vmovdqu xmm15,XMMWORD[224+rcx]
|
| + vaesenclast xmm1,xmm1,xmm15
|
| + vaesenclast xmm2,xmm2,xmm15
|
| + vaesenclast xmm3,xmm3,xmm15
|
| + vaesenclast xmm4,xmm4,xmm15
|
| + vaesenclast xmm5,xmm5,xmm15
|
| + vaesenclast xmm6,xmm6,xmm15
|
| + vaesenclast xmm7,xmm7,xmm15
|
| + vaesenclast xmm8,xmm8,xmm15
|
| +
|
| +
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rdi]
|
| + vpxor xmm2,xmm2,XMMWORD[16+rdi]
|
| + vpxor xmm3,xmm3,XMMWORD[32+rdi]
|
| + vpxor xmm4,xmm4,XMMWORD[48+rdi]
|
| + vpxor xmm5,xmm5,XMMWORD[64+rdi]
|
| + vpxor xmm6,xmm6,XMMWORD[80+rdi]
|
| + vpxor xmm7,xmm7,XMMWORD[96+rdi]
|
| + vpxor xmm8,xmm8,XMMWORD[112+rdi]
|
| +
|
| + sub r8,1
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm1
|
| + vmovdqu XMMWORD[16+rsi],xmm2
|
| + vmovdqu XMMWORD[32+rsi],xmm3
|
| + vmovdqu XMMWORD[48+rsi],xmm4
|
| + vmovdqu XMMWORD[64+rsi],xmm5
|
| + vmovdqu XMMWORD[80+rsi],xmm6
|
| + vmovdqu XMMWORD[96+rsi],xmm7
|
| + vmovdqu XMMWORD[112+rsi],xmm8
|
| +
|
| + jne NEAR $L$256_enc_msg_x8_loop1
|
| +
|
| + add rsi,128
|
| + add rdi,128
|
| +
|
| +$L$256_enc_msg_x8_check_remainder:
|
| + cmp r10,0
|
| + je NEAR $L$256_enc_msg_x8_out
|
| +
|
| +$L$256_enc_msg_x8_loop2:
|
| +
|
| +
|
| + vmovdqa xmm1,xmm0
|
| + vpaddd xmm0,xmm0,XMMWORD[one]
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[16+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[32+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[48+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[64+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[80+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[96+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[112+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[128+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[144+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[160+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[176+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[192+rcx]
|
| + vaesenc xmm1,xmm1,XMMWORD[208+rcx]
|
| + vaesenclast xmm1,xmm1,XMMWORD[224+rcx]
|
| +
|
| +
|
| + vpxor xmm1,xmm1,XMMWORD[rdi]
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm1
|
| +
|
| + add rdi,16
|
| + add rsi,16
|
| + sub r10,1
|
| + jnz NEAR $L$256_enc_msg_x8_loop2
|
| +
|
| +$L$256_enc_msg_x8_out:
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +
|
| +$L$SEH_end_aes256gcmsiv_enc_msg_x8:
|
| +global aes256gcmsiv_dec
|
| +
|
| +ALIGN 16
|
| +aes256gcmsiv_dec:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes256gcmsiv_dec:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| + mov rcx,r9
|
| + mov r8,QWORD[40+rsp]
|
| + mov r9,QWORD[48+rsp]
|
| +
|
| +
|
| +
|
| + test r9,~15
|
| + jnz NEAR $L$256_dec_start
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$256_dec_start:
|
| + vzeroupper
|
| + vmovdqa xmm0,XMMWORD[rdx]
|
| + mov rax,rdx
|
| +
|
| + lea rax,[32+rax]
|
| + lea rcx,[32+rcx]
|
| +
|
| +
|
| + vmovdqu xmm15,XMMWORD[r9*1+rdi]
|
| + vpor xmm15,xmm15,XMMWORD[OR_MASK]
|
| + and r9,~15
|
| +
|
| +
|
| + cmp r9,96
|
| + jb NEAR $L$256_dec_loop2
|
| +
|
| +
|
| + sub r9,96
|
| + vmovdqa xmm7,xmm15
|
| + vpaddd xmm8,xmm7,XMMWORD[one]
|
| + vpaddd xmm9,xmm7,XMMWORD[two]
|
| + vpaddd xmm10,xmm9,XMMWORD[one]
|
| + vpaddd xmm11,xmm9,XMMWORD[two]
|
| + vpaddd xmm12,xmm11,XMMWORD[one]
|
| + vpaddd xmm15,xmm11,XMMWORD[two]
|
| +
|
| + vpxor xmm7,xmm7,XMMWORD[r8]
|
| + vpxor xmm8,xmm8,XMMWORD[r8]
|
| + vpxor xmm9,xmm9,XMMWORD[r8]
|
| + vpxor xmm10,xmm10,XMMWORD[r8]
|
| + vpxor xmm11,xmm11,XMMWORD[r8]
|
| + vpxor xmm12,xmm12,XMMWORD[r8]
|
| +
|
| + vmovdqu xmm4,XMMWORD[16+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[32+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[48+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[64+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[80+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[96+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[112+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[128+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[144+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[160+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[176+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[192+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[208+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[224+r8]
|
| + vaesenclast xmm7,xmm7,xmm4
|
| + vaesenclast xmm8,xmm8,xmm4
|
| + vaesenclast xmm9,xmm9,xmm4
|
| + vaesenclast xmm10,xmm10,xmm4
|
| + vaesenclast xmm11,xmm11,xmm4
|
| + vaesenclast xmm12,xmm12,xmm4
|
| +
|
| +
|
| + vpxor xmm7,xmm7,XMMWORD[rdi]
|
| + vpxor xmm8,xmm8,XMMWORD[16+rdi]
|
| + vpxor xmm9,xmm9,XMMWORD[32+rdi]
|
| + vpxor xmm10,xmm10,XMMWORD[48+rdi]
|
| + vpxor xmm11,xmm11,XMMWORD[64+rdi]
|
| + vpxor xmm12,xmm12,XMMWORD[80+rdi]
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm7
|
| + vmovdqu XMMWORD[16+rsi],xmm8
|
| + vmovdqu XMMWORD[32+rsi],xmm9
|
| + vmovdqu XMMWORD[48+rsi],xmm10
|
| + vmovdqu XMMWORD[64+rsi],xmm11
|
| + vmovdqu XMMWORD[80+rsi],xmm12
|
| +
|
| + add rdi,96
|
| + add rsi,96
|
| + jmp NEAR $L$256_dec_loop1
|
| +
|
| +
|
| +ALIGN 64
|
| +$L$256_dec_loop1:
|
| + cmp r9,96
|
| + jb NEAR $L$256_dec_finish_96
|
| + sub r9,96
|
| +
|
| + vmovdqa xmm6,xmm12
|
| + vmovdqa XMMWORD[(16-32)+rax],xmm11
|
| + vmovdqa XMMWORD[(32-32)+rax],xmm10
|
| + vmovdqa XMMWORD[(48-32)+rax],xmm9
|
| + vmovdqa XMMWORD[(64-32)+rax],xmm8
|
| + vmovdqa XMMWORD[(80-32)+rax],xmm7
|
| +
|
| + vmovdqa xmm7,xmm15
|
| + vpaddd xmm8,xmm7,XMMWORD[one]
|
| + vpaddd xmm9,xmm7,XMMWORD[two]
|
| + vpaddd xmm10,xmm9,XMMWORD[one]
|
| + vpaddd xmm11,xmm9,XMMWORD[two]
|
| + vpaddd xmm12,xmm11,XMMWORD[one]
|
| + vpaddd xmm15,xmm11,XMMWORD[two]
|
| +
|
| + vmovdqa xmm4,XMMWORD[r8]
|
| + vpxor xmm7,xmm7,xmm4
|
| + vpxor xmm8,xmm8,xmm4
|
| + vpxor xmm9,xmm9,xmm4
|
| + vpxor xmm10,xmm10,xmm4
|
| + vpxor xmm11,xmm11,xmm4
|
| + vpxor xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[((0-32))+rcx]
|
| + vpclmulqdq xmm2,xmm6,xmm4,0x11
|
| + vpclmulqdq xmm3,xmm6,xmm4,0x00
|
| + vpclmulqdq xmm1,xmm6,xmm4,0x01
|
| + vpclmulqdq xmm4,xmm6,xmm4,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[16+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[((-16))+rax]
|
| + vmovdqu xmm13,XMMWORD[((-16))+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm4,XMMWORD[32+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[rax]
|
| + vmovdqu xmm13,XMMWORD[rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm4,XMMWORD[48+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[16+rax]
|
| + vmovdqu xmm13,XMMWORD[16+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm4,XMMWORD[64+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[32+rax]
|
| + vmovdqu xmm13,XMMWORD[32+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm4,XMMWORD[80+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[96+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[112+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| +
|
| + vmovdqa xmm6,XMMWORD[((80-32))+rax]
|
| + vpxor xmm6,xmm6,xmm0
|
| + vmovdqu xmm5,XMMWORD[((80-32))+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[128+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| +
|
| + vpsrldq xmm4,xmm1,8
|
| + vpxor xmm5,xmm2,xmm4
|
| + vpslldq xmm4,xmm1,8
|
| + vpxor xmm0,xmm3,xmm4
|
| +
|
| + vmovdqa xmm3,XMMWORD[poly]
|
| +
|
| + vmovdqu xmm4,XMMWORD[144+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[160+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[176+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[192+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm4,XMMWORD[208+r8]
|
| + vaesenc xmm7,xmm7,xmm4
|
| + vaesenc xmm8,xmm8,xmm4
|
| + vaesenc xmm9,xmm9,xmm4
|
| + vaesenc xmm10,xmm10,xmm4
|
| + vaesenc xmm11,xmm11,xmm4
|
| + vaesenc xmm12,xmm12,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[224+r8]
|
| + vpalignr xmm2,xmm0,xmm0,8
|
| + vpclmulqdq xmm0,xmm0,xmm3,0x10
|
| + vpxor xmm0,xmm2,xmm0
|
| +
|
| + vpxor xmm4,xmm6,XMMWORD[rdi]
|
| + vaesenclast xmm7,xmm7,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[16+rdi]
|
| + vaesenclast xmm8,xmm8,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[32+rdi]
|
| + vaesenclast xmm9,xmm9,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[48+rdi]
|
| + vaesenclast xmm10,xmm10,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[64+rdi]
|
| + vaesenclast xmm11,xmm11,xmm4
|
| + vpxor xmm4,xmm6,XMMWORD[80+rdi]
|
| + vaesenclast xmm12,xmm12,xmm4
|
| +
|
| + vpalignr xmm2,xmm0,xmm0,8
|
| + vpclmulqdq xmm0,xmm0,xmm3,0x10
|
| + vpxor xmm0,xmm2,xmm0
|
| +
|
| + vmovdqu XMMWORD[rsi],xmm7
|
| + vmovdqu XMMWORD[16+rsi],xmm8
|
| + vmovdqu XMMWORD[32+rsi],xmm9
|
| + vmovdqu XMMWORD[48+rsi],xmm10
|
| + vmovdqu XMMWORD[64+rsi],xmm11
|
| + vmovdqu XMMWORD[80+rsi],xmm12
|
| +
|
| + vpxor xmm0,xmm0,xmm5
|
| +
|
| + lea rdi,[96+rdi]
|
| + lea rsi,[96+rsi]
|
| + jmp NEAR $L$256_dec_loop1
|
| +
|
| +$L$256_dec_finish_96:
|
| + vmovdqa xmm6,xmm12
|
| + vmovdqa XMMWORD[(16-32)+rax],xmm11
|
| + vmovdqa XMMWORD[(32-32)+rax],xmm10
|
| + vmovdqa XMMWORD[(48-32)+rax],xmm9
|
| + vmovdqa XMMWORD[(64-32)+rax],xmm8
|
| + vmovdqa XMMWORD[(80-32)+rax],xmm7
|
| +
|
| + vmovdqu xmm4,XMMWORD[((0-32))+rcx]
|
| + vpclmulqdq xmm1,xmm6,xmm4,0x10
|
| + vpclmulqdq xmm2,xmm6,xmm4,0x11
|
| + vpclmulqdq xmm3,xmm6,xmm4,0x00
|
| + vpclmulqdq xmm4,xmm6,xmm4,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[((-16))+rax]
|
| + vmovdqu xmm13,XMMWORD[((-16))+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[rax]
|
| + vmovdqu xmm13,XMMWORD[rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[16+rax]
|
| + vmovdqu xmm13,XMMWORD[16+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vmovdqu xmm6,XMMWORD[32+rax]
|
| + vmovdqu xmm13,XMMWORD[32+rcx]
|
| +
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm13,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| +
|
| + vmovdqu xmm6,XMMWORD[((80-32))+rax]
|
| + vpxor xmm6,xmm6,xmm0
|
| + vmovdqu xmm5,XMMWORD[((80-32))+rcx]
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x11
|
| + vpxor xmm2,xmm2,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x00
|
| + vpxor xmm3,xmm3,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x10
|
| + vpxor xmm1,xmm1,xmm4
|
| + vpclmulqdq xmm4,xmm6,xmm5,0x01
|
| + vpxor xmm1,xmm1,xmm4
|
| +
|
| + vpsrldq xmm4,xmm1,8
|
| + vpxor xmm5,xmm2,xmm4
|
| + vpslldq xmm4,xmm1,8
|
| + vpxor xmm0,xmm3,xmm4
|
| +
|
| + vmovdqa xmm3,XMMWORD[poly]
|
| +
|
| + vpalignr xmm2,xmm0,xmm0,8
|
| + vpclmulqdq xmm0,xmm0,xmm3,0x10
|
| + vpxor xmm0,xmm2,xmm0
|
| +
|
| + vpalignr xmm2,xmm0,xmm0,8
|
| + vpclmulqdq xmm0,xmm0,xmm3,0x10
|
| + vpxor xmm0,xmm2,xmm0
|
| +
|
| + vpxor xmm0,xmm0,xmm5
|
| +
|
| +$L$256_dec_loop2:
|
| +
|
| +
|
| +
|
| + cmp r9,16
|
| + jb NEAR $L$256_dec_out
|
| + sub r9,16
|
| +
|
| + vmovdqa xmm2,xmm15
|
| + vpaddd xmm15,xmm15,XMMWORD[one]
|
| +
|
| + vpxor xmm2,xmm2,XMMWORD[r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[16+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[32+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[48+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[64+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[80+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[96+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[112+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[128+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[144+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[160+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[176+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[192+r8]
|
| + vaesenc xmm2,xmm2,XMMWORD[208+r8]
|
| + vaesenclast xmm2,xmm2,XMMWORD[224+r8]
|
| + vpxor xmm2,xmm2,XMMWORD[rdi]
|
| + vmovdqu XMMWORD[rsi],xmm2
|
| + add rdi,16
|
| + add rsi,16
|
| +
|
| + vpxor xmm0,xmm0,xmm2
|
| + vmovdqa xmm1,XMMWORD[((-32))+rcx]
|
| + call GFMUL
|
| +
|
| + jmp NEAR $L$256_dec_loop2
|
| +
|
| +$L$256_dec_out:
|
| + vmovdqu XMMWORD[rdx],xmm0
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes256gcmsiv_dec:
|
| +global aes256gcmsiv_kdf
|
| +
|
| +ALIGN 16
|
| +aes256gcmsiv_kdf:
|
| + mov QWORD[8+rsp],rdi ;WIN64 prologue
|
| + mov QWORD[16+rsp],rsi
|
| + mov rax,rsp
|
| +$L$SEH_begin_aes256gcmsiv_kdf:
|
| + mov rdi,rcx
|
| + mov rsi,rdx
|
| + mov rdx,r8
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| + vmovdqa xmm1,XMMWORD[rdx]
|
| + vmovdqa xmm4,XMMWORD[rdi]
|
| + vmovdqa xmm11,XMMWORD[and_mask]
|
| + vmovdqa xmm8,XMMWORD[one]
|
| + vpshufd xmm4,xmm4,0x90
|
| + vpand xmm4,xmm4,xmm11
|
| + vpaddd xmm6,xmm4,xmm8
|
| + vpaddd xmm7,xmm6,xmm8
|
| + vpaddd xmm11,xmm7,xmm8
|
| + vpaddd xmm12,xmm11,xmm8
|
| + vpaddd xmm13,xmm12,xmm8
|
| +
|
| + vpxor xmm4,xmm4,xmm1
|
| + vpxor xmm6,xmm6,xmm1
|
| + vpxor xmm7,xmm7,xmm1
|
| + vpxor xmm11,xmm11,xmm1
|
| + vpxor xmm12,xmm12,xmm1
|
| + vpxor xmm13,xmm13,xmm1
|
| +
|
| + vmovdqa xmm1,XMMWORD[16+rdx]
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vaesenc xmm6,xmm6,xmm1
|
| + vaesenc xmm7,xmm7,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| + vaesenc xmm13,xmm13,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[32+rdx]
|
| + vaesenc xmm4,xmm4,xmm2
|
| + vaesenc xmm6,xmm6,xmm2
|
| + vaesenc xmm7,xmm7,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| + vaesenc xmm13,xmm13,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[48+rdx]
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vaesenc xmm6,xmm6,xmm1
|
| + vaesenc xmm7,xmm7,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| + vaesenc xmm13,xmm13,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[64+rdx]
|
| + vaesenc xmm4,xmm4,xmm2
|
| + vaesenc xmm6,xmm6,xmm2
|
| + vaesenc xmm7,xmm7,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| + vaesenc xmm13,xmm13,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[80+rdx]
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vaesenc xmm6,xmm6,xmm1
|
| + vaesenc xmm7,xmm7,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| + vaesenc xmm13,xmm13,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[96+rdx]
|
| + vaesenc xmm4,xmm4,xmm2
|
| + vaesenc xmm6,xmm6,xmm2
|
| + vaesenc xmm7,xmm7,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| + vaesenc xmm13,xmm13,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[112+rdx]
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vaesenc xmm6,xmm6,xmm1
|
| + vaesenc xmm7,xmm7,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| + vaesenc xmm13,xmm13,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[128+rdx]
|
| + vaesenc xmm4,xmm4,xmm2
|
| + vaesenc xmm6,xmm6,xmm2
|
| + vaesenc xmm7,xmm7,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| + vaesenc xmm13,xmm13,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[144+rdx]
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vaesenc xmm6,xmm6,xmm1
|
| + vaesenc xmm7,xmm7,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| + vaesenc xmm13,xmm13,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[160+rdx]
|
| + vaesenc xmm4,xmm4,xmm2
|
| + vaesenc xmm6,xmm6,xmm2
|
| + vaesenc xmm7,xmm7,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| + vaesenc xmm13,xmm13,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[176+rdx]
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vaesenc xmm6,xmm6,xmm1
|
| + vaesenc xmm7,xmm7,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| + vaesenc xmm13,xmm13,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[192+rdx]
|
| + vaesenc xmm4,xmm4,xmm2
|
| + vaesenc xmm6,xmm6,xmm2
|
| + vaesenc xmm7,xmm7,xmm2
|
| + vaesenc xmm11,xmm11,xmm2
|
| + vaesenc xmm12,xmm12,xmm2
|
| + vaesenc xmm13,xmm13,xmm2
|
| +
|
| + vmovdqa xmm1,XMMWORD[208+rdx]
|
| + vaesenc xmm4,xmm4,xmm1
|
| + vaesenc xmm6,xmm6,xmm1
|
| + vaesenc xmm7,xmm7,xmm1
|
| + vaesenc xmm11,xmm11,xmm1
|
| + vaesenc xmm12,xmm12,xmm1
|
| + vaesenc xmm13,xmm13,xmm1
|
| +
|
| + vmovdqa xmm2,XMMWORD[224+rdx]
|
| + vaesenclast xmm4,xmm4,xmm2
|
| + vaesenclast xmm6,xmm6,xmm2
|
| + vaesenclast xmm7,xmm7,xmm2
|
| + vaesenclast xmm11,xmm11,xmm2
|
| + vaesenclast xmm12,xmm12,xmm2
|
| + vaesenclast xmm13,xmm13,xmm2
|
| +
|
| +
|
| + vmovdqa XMMWORD[rsi],xmm4
|
| + vmovdqa XMMWORD[16+rsi],xmm6
|
| + vmovdqa XMMWORD[32+rsi],xmm7
|
| + vmovdqa XMMWORD[48+rsi],xmm11
|
| + vmovdqa XMMWORD[64+rsi],xmm12
|
| + vmovdqa XMMWORD[80+rsi],xmm13
|
| + mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
| + mov rsi,QWORD[16+rsp]
|
| + DB 0F3h,0C3h ;repret
|
| +
|
| +$L$SEH_end_aes256gcmsiv_kdf:
|
|
|