Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(142)

Unified Diff: third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm
diff --git a/third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm b/third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm
new file mode 100644
index 0000000000000000000000000000000000000000..a2e40758194f84bc2b8df35fd20dc611155f170b
--- /dev/null
+++ b/third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm
@@ -0,0 +1,1925 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+EXTERN OPENSSL_ia32cap_P
+
+
+ALIGN 64
+$L$poly:
+ DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
+
+$L$One:
+ DD 1,1,1,1,1,1,1,1
+$L$Two:
+ DD 2,2,2,2,2,2,2,2
+$L$Three:
+ DD 3,3,3,3,3,3,3,3
+$L$ONE_mont:
+ DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
+
+
+ALIGN 64
+ecp_nistz256_mul_by_2:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_mul_by_2:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+ push r12
+ push r13
+
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[8+rsi]
+ add r8,r8
+ mov r10,QWORD[16+rsi]
+ adc r9,r9
+ mov r11,QWORD[24+rsi]
+ lea rsi,[$L$poly]
+ mov rax,r8
+ adc r10,r10
+ adc r11,r11
+ mov rdx,r9
+ sbb r13,r13
+
+ sub r8,QWORD[rsi]
+ mov rcx,r10
+ sbb r9,QWORD[8+rsi]
+ sbb r10,QWORD[16+rsi]
+ mov r12,r11
+ sbb r11,QWORD[24+rsi]
+ test r13,r13
+
+ cmovz r8,rax
+ cmovz r9,rdx
+ mov QWORD[rdi],r8
+ cmovz r10,rcx
+ mov QWORD[8+rdi],r9
+ cmovz r11,r12
+ mov QWORD[16+rdi],r10
+ mov QWORD[24+rdi],r11
+
+ pop r13
+ pop r12
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_mul_by_2:
+
+
+
+global ecp_nistz256_neg
+
+ALIGN 32
+ecp_nistz256_neg:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_neg:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+ push r12
+ push r13
+
+ xor r8,r8
+ xor r9,r9
+ xor r10,r10
+ xor r11,r11
+ xor r13,r13
+
+ sub r8,QWORD[rsi]
+ sbb r9,QWORD[8+rsi]
+ sbb r10,QWORD[16+rsi]
+ mov rax,r8
+ sbb r11,QWORD[24+rsi]
+ lea rsi,[$L$poly]
+ mov rdx,r9
+ sbb r13,0
+
+ add r8,QWORD[rsi]
+ mov rcx,r10
+ adc r9,QWORD[8+rsi]
+ adc r10,QWORD[16+rsi]
+ mov r12,r11
+ adc r11,QWORD[24+rsi]
+ test r13,r13
+
+ cmovz r8,rax
+ cmovz r9,rdx
+ mov QWORD[rdi],r8
+ cmovz r10,rcx
+ mov QWORD[8+rdi],r9
+ cmovz r11,r12
+ mov QWORD[16+rdi],r10
+ mov QWORD[24+rdi],r11
+
+ pop r13
+ pop r12
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_neg:
+
+
+
+
+
+
+global ecp_nistz256_mul_mont
+
+ALIGN 32
+ecp_nistz256_mul_mont:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_mul_mont:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+$L$mul_mont:
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ mov rbx,rdx
+ mov rax,QWORD[rdx]
+ mov r9,QWORD[rsi]
+ mov r10,QWORD[8+rsi]
+ mov r11,QWORD[16+rsi]
+ mov r12,QWORD[24+rsi]
+
+ call __ecp_nistz256_mul_montq
+$L$mul_mont_done:
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+ pop rbp
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_mul_mont:
+
+
+ALIGN 32
+__ecp_nistz256_mul_montq:
+
+
+ mov rbp,rax
+ mul r9
+ mov r14,QWORD[(($L$poly+8))]
+ mov r8,rax
+ mov rax,rbp
+ mov r9,rdx
+
+ mul r10
+ mov r15,QWORD[(($L$poly+24))]
+ add r9,rax
+ mov rax,rbp
+ adc rdx,0
+ mov r10,rdx
+
+ mul r11
+ add r10,rax
+ mov rax,rbp
+ adc rdx,0
+ mov r11,rdx
+
+ mul r12
+ add r11,rax
+ mov rax,r8
+ adc rdx,0
+ xor r13,r13
+ mov r12,rdx
+
+
+
+
+
+
+
+
+
+
+ mov rbp,r8
+ shl r8,32
+ mul r15
+ shr rbp,32
+ add r9,r8
+ adc r10,rbp
+ adc r11,rax
+ mov rax,QWORD[8+rbx]
+ adc r12,rdx
+ adc r13,0
+ xor r8,r8
+
+
+
+ mov rbp,rax
+ mul QWORD[rsi]
+ add r9,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[8+rsi]
+ add r10,rcx
+ adc rdx,0
+ add r10,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[16+rsi]
+ add r11,rcx
+ adc rdx,0
+ add r11,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[24+rsi]
+ add r12,rcx
+ adc rdx,0
+ add r12,rax
+ mov rax,r9
+ adc r13,rdx
+ adc r8,0
+
+
+
+ mov rbp,r9
+ shl r9,32
+ mul r15
+ shr rbp,32
+ add r10,r9
+ adc r11,rbp
+ adc r12,rax
+ mov rax,QWORD[16+rbx]
+ adc r13,rdx
+ adc r8,0
+ xor r9,r9
+
+
+
+ mov rbp,rax
+ mul QWORD[rsi]
+ add r10,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[8+rsi]
+ add r11,rcx
+ adc rdx,0
+ add r11,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[16+rsi]
+ add r12,rcx
+ adc rdx,0
+ add r12,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[24+rsi]
+ add r13,rcx
+ adc rdx,0
+ add r13,rax
+ mov rax,r10
+ adc r8,rdx
+ adc r9,0
+
+
+
+ mov rbp,r10
+ shl r10,32
+ mul r15
+ shr rbp,32
+ add r11,r10
+ adc r12,rbp
+ adc r13,rax
+ mov rax,QWORD[24+rbx]
+ adc r8,rdx
+ adc r9,0
+ xor r10,r10
+
+
+
+ mov rbp,rax
+ mul QWORD[rsi]
+ add r11,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[8+rsi]
+ add r12,rcx
+ adc rdx,0
+ add r12,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[16+rsi]
+ add r13,rcx
+ adc rdx,0
+ add r13,rax
+ mov rax,rbp
+ adc rdx,0
+ mov rcx,rdx
+
+ mul QWORD[24+rsi]
+ add r8,rcx
+ adc rdx,0
+ add r8,rax
+ mov rax,r11
+ adc r9,rdx
+ adc r10,0
+
+
+
+ mov rbp,r11
+ shl r11,32
+ mul r15
+ shr rbp,32
+ add r12,r11
+ adc r13,rbp
+ mov rcx,r12
+ adc r8,rax
+ adc r9,rdx
+ mov rbp,r13
+ adc r10,0
+
+
+
+ sub r12,-1
+ mov rbx,r8
+ sbb r13,r14
+ sbb r8,0
+ mov rdx,r9
+ sbb r9,r15
+ sbb r10,0
+
+ cmovc r12,rcx
+ cmovc r13,rbp
+ mov QWORD[rdi],r12
+ cmovc r8,rbx
+ mov QWORD[8+rdi],r13
+ cmovc r9,rdx
+ mov QWORD[16+rdi],r8
+ mov QWORD[24+rdi],r9
+
+ DB 0F3h,0C3h ;repret
+
+
+
+
+
+
+
+
+
+global ecp_nistz256_sqr_mont
+
+ALIGN 32
+ecp_nistz256_sqr_mont:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_sqr_mont:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ mov rax,QWORD[rsi]
+ mov r14,QWORD[8+rsi]
+ mov r15,QWORD[16+rsi]
+ mov r8,QWORD[24+rsi]
+
+ call __ecp_nistz256_sqr_montq
+$L$sqr_mont_done:
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+ pop rbp
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_sqr_mont:
+
+
+ALIGN 32
+__ecp_nistz256_sqr_montq:
+ mov r13,rax
+ mul r14
+ mov r9,rax
+ mov rax,r15
+ mov r10,rdx
+
+ mul r13
+ add r10,rax
+ mov rax,r8
+ adc rdx,0
+ mov r11,rdx
+
+ mul r13
+ add r11,rax
+ mov rax,r15
+ adc rdx,0
+ mov r12,rdx
+
+
+ mul r14
+ add r11,rax
+ mov rax,r8
+ adc rdx,0
+ mov rbp,rdx
+
+ mul r14
+ add r12,rax
+ mov rax,r8
+ adc rdx,0
+ add r12,rbp
+ mov r13,rdx
+ adc r13,0
+
+
+ mul r15
+ xor r15,r15
+ add r13,rax
+ mov rax,QWORD[rsi]
+ mov r14,rdx
+ adc r14,0
+
+ add r9,r9
+ adc r10,r10
+ adc r11,r11
+ adc r12,r12
+ adc r13,r13
+ adc r14,r14
+ adc r15,0
+
+ mul rax
+ mov r8,rax
+ mov rax,QWORD[8+rsi]
+ mov rcx,rdx
+
+ mul rax
+ add r9,rcx
+ adc r10,rax
+ mov rax,QWORD[16+rsi]
+ adc rdx,0
+ mov rcx,rdx
+
+ mul rax
+ add r11,rcx
+ adc r12,rax
+ mov rax,QWORD[24+rsi]
+ adc rdx,0
+ mov rcx,rdx
+
+ mul rax
+ add r13,rcx
+ adc r14,rax
+ mov rax,r8
+ adc r15,rdx
+
+ mov rsi,QWORD[(($L$poly+8))]
+ mov rbp,QWORD[(($L$poly+24))]
+
+
+
+
+ mov rcx,r8
+ shl r8,32
+ mul rbp
+ shr rcx,32
+ add r9,r8
+ adc r10,rcx
+ adc r11,rax
+ mov rax,r9
+ adc rdx,0
+
+
+
+ mov rcx,r9
+ shl r9,32
+ mov r8,rdx
+ mul rbp
+ shr rcx,32
+ add r10,r9
+ adc r11,rcx
+ adc r8,rax
+ mov rax,r10
+ adc rdx,0
+
+
+
+ mov rcx,r10
+ shl r10,32
+ mov r9,rdx
+ mul rbp
+ shr rcx,32
+ add r11,r10
+ adc r8,rcx
+ adc r9,rax
+ mov rax,r11
+ adc rdx,0
+
+
+
+ mov rcx,r11
+ shl r11,32
+ mov r10,rdx
+ mul rbp
+ shr rcx,32
+ add r8,r11
+ adc r9,rcx
+ adc r10,rax
+ adc rdx,0
+ xor r11,r11
+
+
+
+ add r12,r8
+ adc r13,r9
+ mov r8,r12
+ adc r14,r10
+ adc r15,rdx
+ mov r9,r13
+ adc r11,0
+
+ sub r12,-1
+ mov r10,r14
+ sbb r13,rsi
+ sbb r14,0
+ mov rcx,r15
+ sbb r15,rbp
+ sbb r11,0
+
+ cmovc r12,r8
+ cmovc r13,r9
+ mov QWORD[rdi],r12
+ cmovc r14,r10
+ mov QWORD[8+rdi],r13
+ cmovc r15,rcx
+ mov QWORD[16+rdi],r14
+ mov QWORD[24+rdi],r15
+
+ DB 0F3h,0C3h ;repret
+
+
+
+
+
+
+
+global ecp_nistz256_from_mont
+
+ALIGN 32
+ecp_nistz256_from_mont:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_from_mont:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+ push r12
+ push r13
+
+ mov rax,QWORD[rsi]
+ mov r13,QWORD[(($L$poly+24))]
+ mov r9,QWORD[8+rsi]
+ mov r10,QWORD[16+rsi]
+ mov r11,QWORD[24+rsi]
+ mov r8,rax
+ mov r12,QWORD[(($L$poly+8))]
+
+
+
+ mov rcx,rax
+ shl r8,32
+ mul r13
+ shr rcx,32
+ add r9,r8
+ adc r10,rcx
+ adc r11,rax
+ mov rax,r9
+ adc rdx,0
+
+
+
+ mov rcx,r9
+ shl r9,32
+ mov r8,rdx
+ mul r13
+ shr rcx,32
+ add r10,r9
+ adc r11,rcx
+ adc r8,rax
+ mov rax,r10
+ adc rdx,0
+
+
+
+ mov rcx,r10
+ shl r10,32
+ mov r9,rdx
+ mul r13
+ shr rcx,32
+ add r11,r10
+ adc r8,rcx
+ adc r9,rax
+ mov rax,r11
+ adc rdx,0
+
+
+
+ mov rcx,r11
+ shl r11,32
+ mov r10,rdx
+ mul r13
+ shr rcx,32
+ add r8,r11
+ adc r9,rcx
+ mov rcx,r8
+ adc r10,rax
+ mov rsi,r9
+ adc rdx,0
+
+ sub r8,-1
+ mov rax,r10
+ sbb r9,r12
+ sbb r10,0
+ mov r11,rdx
+ sbb rdx,r13
+ sbb r13,r13
+
+ cmovnz r8,rcx
+ cmovnz r9,rsi
+ mov QWORD[rdi],r8
+ cmovnz r10,rax
+ mov QWORD[8+rdi],r9
+ cmovz r11,rdx
+ mov QWORD[16+rdi],r10
+ mov QWORD[24+rdi],r11
+
+ pop r13
+ pop r12
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_from_mont:
+
+
+global ecp_nistz256_select_w5
+
+ALIGN 32
+ecp_nistz256_select_w5:
+ lea rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_select_w5:
+DB 0x48,0x8d,0x60,0xe0
+DB 0x0f,0x29,0x70,0xe0
+DB 0x0f,0x29,0x78,0xf0
+DB 0x44,0x0f,0x29,0x00
+DB 0x44,0x0f,0x29,0x48,0x10
+DB 0x44,0x0f,0x29,0x50,0x20
+DB 0x44,0x0f,0x29,0x58,0x30
+DB 0x44,0x0f,0x29,0x60,0x40
+DB 0x44,0x0f,0x29,0x68,0x50
+DB 0x44,0x0f,0x29,0x70,0x60
+DB 0x44,0x0f,0x29,0x78,0x70
+ movdqa xmm0,XMMWORD[$L$One]
+ movd xmm1,r8d
+
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+
+ movdqa xmm8,xmm0
+ pshufd xmm1,xmm1,0
+
+ mov rax,16
+$L$select_loop_sse_w5:
+
+ movdqa xmm15,xmm8
+ paddd xmm8,xmm0
+ pcmpeqd xmm15,xmm1
+
+ movdqa xmm9,XMMWORD[rdx]
+ movdqa xmm10,XMMWORD[16+rdx]
+ movdqa xmm11,XMMWORD[32+rdx]
+ movdqa xmm12,XMMWORD[48+rdx]
+ movdqa xmm13,XMMWORD[64+rdx]
+ movdqa xmm14,XMMWORD[80+rdx]
+ lea rdx,[96+rdx]
+
+ pand xmm9,xmm15
+ pand xmm10,xmm15
+ por xmm2,xmm9
+ pand xmm11,xmm15
+ por xmm3,xmm10
+ pand xmm12,xmm15
+ por xmm4,xmm11
+ pand xmm13,xmm15
+ por xmm5,xmm12
+ pand xmm14,xmm15
+ por xmm6,xmm13
+ por xmm7,xmm14
+
+ dec rax
+ jnz NEAR $L$select_loop_sse_w5
+
+ movdqu XMMWORD[rcx],xmm2
+ movdqu XMMWORD[16+rcx],xmm3
+ movdqu XMMWORD[32+rcx],xmm4
+ movdqu XMMWORD[48+rcx],xmm5
+ movdqu XMMWORD[64+rcx],xmm6
+ movdqu XMMWORD[80+rcx],xmm7
+ movaps xmm6,XMMWORD[rsp]
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps xmm10,XMMWORD[64+rsp]
+ movaps xmm11,XMMWORD[80+rsp]
+ movaps xmm12,XMMWORD[96+rsp]
+ movaps xmm13,XMMWORD[112+rsp]
+ movaps xmm14,XMMWORD[128+rsp]
+ movaps xmm15,XMMWORD[144+rsp]
+ lea rsp,[168+rsp]
+$L$SEH_end_ecp_nistz256_select_w5:
+ DB 0F3h,0C3h ;repret
+
+
+
+
+global ecp_nistz256_select_w7
+
+ALIGN 32
+ecp_nistz256_select_w7:
+ lea rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_select_w7:
+DB 0x48,0x8d,0x60,0xe0
+DB 0x0f,0x29,0x70,0xe0
+DB 0x0f,0x29,0x78,0xf0
+DB 0x44,0x0f,0x29,0x00
+DB 0x44,0x0f,0x29,0x48,0x10
+DB 0x44,0x0f,0x29,0x50,0x20
+DB 0x44,0x0f,0x29,0x58,0x30
+DB 0x44,0x0f,0x29,0x60,0x40
+DB 0x44,0x0f,0x29,0x68,0x50
+DB 0x44,0x0f,0x29,0x70,0x60
+DB 0x44,0x0f,0x29,0x78,0x70
+ movdqa xmm8,XMMWORD[$L$One]
+ movd xmm1,r8d
+
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+
+ movdqa xmm0,xmm8
+ pshufd xmm1,xmm1,0
+ mov rax,64
+
+$L$select_loop_sse_w7:
+ movdqa xmm15,xmm8
+ paddd xmm8,xmm0
+ movdqa xmm9,XMMWORD[rdx]
+ movdqa xmm10,XMMWORD[16+rdx]
+ pcmpeqd xmm15,xmm1
+ movdqa xmm11,XMMWORD[32+rdx]
+ movdqa xmm12,XMMWORD[48+rdx]
+ lea rdx,[64+rdx]
+
+ pand xmm9,xmm15
+ pand xmm10,xmm15
+ por xmm2,xmm9
+ pand xmm11,xmm15
+ por xmm3,xmm10
+ pand xmm12,xmm15
+ por xmm4,xmm11
+ prefetcht0 [255+rdx]
+ por xmm5,xmm12
+
+ dec rax
+ jnz NEAR $L$select_loop_sse_w7
+
+ movdqu XMMWORD[rcx],xmm2
+ movdqu XMMWORD[16+rcx],xmm3
+ movdqu XMMWORD[32+rcx],xmm4
+ movdqu XMMWORD[48+rcx],xmm5
+ movaps xmm6,XMMWORD[rsp]
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps xmm10,XMMWORD[64+rsp]
+ movaps xmm11,XMMWORD[80+rsp]
+ movaps xmm12,XMMWORD[96+rsp]
+ movaps xmm13,XMMWORD[112+rsp]
+ movaps xmm14,XMMWORD[128+rsp]
+ movaps xmm15,XMMWORD[144+rsp]
+ lea rsp,[168+rsp]
+$L$SEH_end_ecp_nistz256_select_w7:
+ DB 0F3h,0C3h ;repret
+
+global ecp_nistz256_avx2_select_w7
+
+ALIGN 32
+ecp_nistz256_avx2_select_w7:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_avx2_select_w7:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+DB 0x0f,0x0b
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_avx2_select_w7:
+
+ALIGN 32
+__ecp_nistz256_add_toq:
+ add r12,QWORD[rbx]
+ adc r13,QWORD[8+rbx]
+ mov rax,r12
+ adc r8,QWORD[16+rbx]
+ adc r9,QWORD[24+rbx]
+ mov rbp,r13
+ sbb r11,r11
+
+ sub r12,-1
+ mov rcx,r8
+ sbb r13,r14
+ sbb r8,0
+ mov r10,r9
+ sbb r9,r15
+ test r11,r11
+
+ cmovz r12,rax
+ cmovz r13,rbp
+ mov QWORD[rdi],r12
+ cmovz r8,rcx
+ mov QWORD[8+rdi],r13
+ cmovz r9,r10
+ mov QWORD[16+rdi],r8
+ mov QWORD[24+rdi],r9
+
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 32
+__ecp_nistz256_sub_fromq:
+ sub r12,QWORD[rbx]
+ sbb r13,QWORD[8+rbx]
+ mov rax,r12
+ sbb r8,QWORD[16+rbx]
+ sbb r9,QWORD[24+rbx]
+ mov rbp,r13
+ sbb r11,r11
+
+ add r12,-1
+ mov rcx,r8
+ adc r13,r14
+ adc r8,0
+ mov r10,r9
+ adc r9,r15
+ test r11,r11
+
+ cmovz r12,rax
+ cmovz r13,rbp
+ mov QWORD[rdi],r12
+ cmovz r8,rcx
+ mov QWORD[8+rdi],r13
+ cmovz r9,r10
+ mov QWORD[16+rdi],r8
+ mov QWORD[24+rdi],r9
+
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 32
+__ecp_nistz256_subq:
+ sub rax,r12
+ sbb rbp,r13
+ mov r12,rax
+ sbb rcx,r8
+ sbb r10,r9
+ mov r13,rbp
+ sbb r11,r11
+
+ add rax,-1
+ mov r8,rcx
+ adc rbp,r14
+ adc rcx,0
+ mov r9,r10
+ adc r10,r15
+ test r11,r11
+
+ cmovnz r12,rax
+ cmovnz r13,rbp
+ cmovnz r8,rcx
+ cmovnz r9,r10
+
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 32
+__ecp_nistz256_mul_by_2q:
+ add r12,r12
+ adc r13,r13
+ mov rax,r12
+ adc r8,r8
+ adc r9,r9
+ mov rbp,r13
+ sbb r11,r11
+
+ sub r12,-1
+ mov rcx,r8
+ sbb r13,r14
+ sbb r8,0
+ mov r10,r9
+ sbb r9,r15
+ test r11,r11
+
+ cmovz r12,rax
+ cmovz r13,rbp
+ mov QWORD[rdi],r12
+ cmovz r8,rcx
+ mov QWORD[8+rdi],r13
+ cmovz r9,r10
+ mov QWORD[16+rdi],r8
+ mov QWORD[24+rdi],r9
+
+ DB 0F3h,0C3h ;repret
+
+global ecp_nistz256_point_double
+
+ALIGN 32
+ecp_nistz256_point_double:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_point_double:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,32*5+8
+
+$L$point_double_shortcutq:
+ movdqu xmm0,XMMWORD[rsi]
+ mov rbx,rsi
+ movdqu xmm1,XMMWORD[16+rsi]
+ mov r12,QWORD[((32+0))+rsi]
+ mov r13,QWORD[((32+8))+rsi]
+ mov r8,QWORD[((32+16))+rsi]
+ mov r9,QWORD[((32+24))+rsi]
+ mov r14,QWORD[(($L$poly+8))]
+ mov r15,QWORD[(($L$poly+24))]
+ movdqa XMMWORD[96+rsp],xmm0
+ movdqa XMMWORD[(96+16)+rsp],xmm1
+ lea r10,[32+rdi]
+ lea r11,[64+rdi]
+DB 102,72,15,110,199
+DB 102,73,15,110,202
+DB 102,73,15,110,211
+
+ lea rdi,[rsp]
+ call __ecp_nistz256_mul_by_2q
+
+ mov rax,QWORD[((64+0))+rsi]
+ mov r14,QWORD[((64+8))+rsi]
+ mov r15,QWORD[((64+16))+rsi]
+ mov r8,QWORD[((64+24))+rsi]
+ lea rsi,[((64-0))+rsi]
+ lea rdi,[64+rsp]
+ call __ecp_nistz256_sqr_montq
+
+ mov rax,QWORD[((0+0))+rsp]
+ mov r14,QWORD[((8+0))+rsp]
+ lea rsi,[((0+0))+rsp]
+ mov r15,QWORD[((16+0))+rsp]
+ mov r8,QWORD[((24+0))+rsp]
+ lea rdi,[rsp]
+ call __ecp_nistz256_sqr_montq
+
+ mov rax,QWORD[32+rbx]
+ mov r9,QWORD[((64+0))+rbx]
+ mov r10,QWORD[((64+8))+rbx]
+ mov r11,QWORD[((64+16))+rbx]
+ mov r12,QWORD[((64+24))+rbx]
+ lea rsi,[((64-0))+rbx]
+ lea rbx,[32+rbx]
+DB 102,72,15,126,215
+ call __ecp_nistz256_mul_montq
+ call __ecp_nistz256_mul_by_2q
+
+ mov r12,QWORD[((96+0))+rsp]
+ mov r13,QWORD[((96+8))+rsp]
+ lea rbx,[64+rsp]
+ mov r8,QWORD[((96+16))+rsp]
+ mov r9,QWORD[((96+24))+rsp]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_add_toq
+
+ mov r12,QWORD[((96+0))+rsp]
+ mov r13,QWORD[((96+8))+rsp]
+ lea rbx,[64+rsp]
+ mov r8,QWORD[((96+16))+rsp]
+ mov r9,QWORD[((96+24))+rsp]
+ lea rdi,[64+rsp]
+ call __ecp_nistz256_sub_fromq
+
+ mov rax,QWORD[((0+0))+rsp]
+ mov r14,QWORD[((8+0))+rsp]
+ lea rsi,[((0+0))+rsp]
+ mov r15,QWORD[((16+0))+rsp]
+ mov r8,QWORD[((24+0))+rsp]
+DB 102,72,15,126,207
+ call __ecp_nistz256_sqr_montq
+ xor r9,r9
+ mov rax,r12
+ add r12,-1
+ mov r10,r13
+ adc r13,rsi
+ mov rcx,r14
+ adc r14,0
+ mov r8,r15
+ adc r15,rbp
+ adc r9,0
+ xor rsi,rsi
+ test rax,1
+
+ cmovz r12,rax
+ cmovz r13,r10
+ cmovz r14,rcx
+ cmovz r15,r8
+ cmovz r9,rsi
+
+ mov rax,r13
+ shr r12,1
+ shl rax,63
+ mov r10,r14
+ shr r13,1
+ or r12,rax
+ shl r10,63
+ mov rcx,r15
+ shr r14,1
+ or r13,r10
+ shl rcx,63
+ mov QWORD[rdi],r12
+ shr r15,1
+ mov QWORD[8+rdi],r13
+ shl r9,63
+ or r14,rcx
+ or r15,r9
+ mov QWORD[16+rdi],r14
+ mov QWORD[24+rdi],r15
+ mov rax,QWORD[64+rsp]
+ lea rbx,[64+rsp]
+ mov r9,QWORD[((0+32))+rsp]
+ mov r10,QWORD[((8+32))+rsp]
+ lea rsi,[((0+32))+rsp]
+ mov r11,QWORD[((16+32))+rsp]
+ mov r12,QWORD[((24+32))+rsp]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_mul_montq
+
+ lea rdi,[128+rsp]
+ call __ecp_nistz256_mul_by_2q
+
+ lea rbx,[32+rsp]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_add_toq
+
+ mov rax,QWORD[96+rsp]
+ lea rbx,[96+rsp]
+ mov r9,QWORD[((0+0))+rsp]
+ mov r10,QWORD[((8+0))+rsp]
+ lea rsi,[((0+0))+rsp]
+ mov r11,QWORD[((16+0))+rsp]
+ mov r12,QWORD[((24+0))+rsp]
+ lea rdi,[rsp]
+ call __ecp_nistz256_mul_montq
+
+ lea rdi,[128+rsp]
+ call __ecp_nistz256_mul_by_2q
+
+ mov rax,QWORD[((0+32))+rsp]
+ mov r14,QWORD[((8+32))+rsp]
+ lea rsi,[((0+32))+rsp]
+ mov r15,QWORD[((16+32))+rsp]
+ mov r8,QWORD[((24+32))+rsp]
+DB 102,72,15,126,199
+ call __ecp_nistz256_sqr_montq
+
+ lea rbx,[128+rsp]
+ mov r8,r14
+ mov r9,r15
+ mov r14,rsi
+ mov r15,rbp
+ call __ecp_nistz256_sub_fromq
+
+ mov rax,QWORD[((0+0))+rsp]
+ mov rbp,QWORD[((0+8))+rsp]
+ mov rcx,QWORD[((0+16))+rsp]
+ mov r10,QWORD[((0+24))+rsp]
+ lea rdi,[rsp]
+ call __ecp_nistz256_subq
+
+ mov rax,QWORD[32+rsp]
+ lea rbx,[32+rsp]
+ mov r14,r12
+ xor ecx,ecx
+ mov QWORD[((0+0))+rsp],r12
+ mov r10,r13
+ mov QWORD[((0+8))+rsp],r13
+ cmovz r11,r8
+ mov QWORD[((0+16))+rsp],r8
+ lea rsi,[((0-0))+rsp]
+ cmovz r12,r9
+ mov QWORD[((0+24))+rsp],r9
+ mov r9,r14
+ lea rdi,[rsp]
+ call __ecp_nistz256_mul_montq
+
+DB 102,72,15,126,203
+DB 102,72,15,126,207
+ call __ecp_nistz256_sub_fromq
+
+ add rsp,32*5+8
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+ pop rbp
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_point_double:
+global ecp_nistz256_point_add
+
+ALIGN 32
+ecp_nistz256_point_add:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_point_add:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,32*18+8
+
+ movdqu xmm0,XMMWORD[rsi]
+ movdqu xmm1,XMMWORD[16+rsi]
+ movdqu xmm2,XMMWORD[32+rsi]
+ movdqu xmm3,XMMWORD[48+rsi]
+ movdqu xmm4,XMMWORD[64+rsi]
+ movdqu xmm5,XMMWORD[80+rsi]
+ mov rbx,rsi
+ mov rsi,rdx
+ movdqa XMMWORD[384+rsp],xmm0
+ movdqa XMMWORD[(384+16)+rsp],xmm1
+ por xmm1,xmm0
+ movdqa XMMWORD[416+rsp],xmm2
+ movdqa XMMWORD[(416+16)+rsp],xmm3
+ por xmm3,xmm2
+ movdqa XMMWORD[448+rsp],xmm4
+ movdqa XMMWORD[(448+16)+rsp],xmm5
+ por xmm3,xmm1
+
+ movdqu xmm0,XMMWORD[rsi]
+ pshufd xmm5,xmm3,0xb1
+ movdqu xmm1,XMMWORD[16+rsi]
+ movdqu xmm2,XMMWORD[32+rsi]
+ por xmm5,xmm3
+ movdqu xmm3,XMMWORD[48+rsi]
+ mov rax,QWORD[((64+0))+rsi]
+ mov r14,QWORD[((64+8))+rsi]
+ mov r15,QWORD[((64+16))+rsi]
+ mov r8,QWORD[((64+24))+rsi]
+ movdqa XMMWORD[480+rsp],xmm0
+ pshufd xmm4,xmm5,0x1e
+ movdqa XMMWORD[(480+16)+rsp],xmm1
+ por xmm1,xmm0
+DB 102,72,15,110,199
+ movdqa XMMWORD[512+rsp],xmm2
+ movdqa XMMWORD[(512+16)+rsp],xmm3
+ por xmm3,xmm2
+ por xmm5,xmm4
+ pxor xmm4,xmm4
+ por xmm3,xmm1
+
+ lea rsi,[((64-0))+rsi]
+ mov QWORD[((544+0))+rsp],rax
+ mov QWORD[((544+8))+rsp],r14
+ mov QWORD[((544+16))+rsp],r15
+ mov QWORD[((544+24))+rsp],r8
+ lea rdi,[96+rsp]
+ call __ecp_nistz256_sqr_montq
+
+ pcmpeqd xmm5,xmm4
+ pshufd xmm4,xmm3,0xb1
+ por xmm4,xmm3
+ pshufd xmm5,xmm5,0
+ pshufd xmm3,xmm4,0x1e
+ por xmm4,xmm3
+ pxor xmm3,xmm3
+ pcmpeqd xmm4,xmm3
+ pshufd xmm4,xmm4,0
+ mov rax,QWORD[((64+0))+rbx]
+ mov r14,QWORD[((64+8))+rbx]
+ mov r15,QWORD[((64+16))+rbx]
+ mov r8,QWORD[((64+24))+rbx]
+DB 102,72,15,110,203
+
+ lea rsi,[((64-0))+rbx]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_sqr_montq
+
+ mov rax,QWORD[544+rsp]
+ lea rbx,[544+rsp]
+ mov r9,QWORD[((0+96))+rsp]
+ mov r10,QWORD[((8+96))+rsp]
+ lea rsi,[((0+96))+rsp]
+ mov r11,QWORD[((16+96))+rsp]
+ mov r12,QWORD[((24+96))+rsp]
+ lea rdi,[224+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[448+rsp]
+ lea rbx,[448+rsp]
+ mov r9,QWORD[((0+32))+rsp]
+ mov r10,QWORD[((8+32))+rsp]
+ lea rsi,[((0+32))+rsp]
+ mov r11,QWORD[((16+32))+rsp]
+ mov r12,QWORD[((24+32))+rsp]
+ lea rdi,[256+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[416+rsp]
+ lea rbx,[416+rsp]
+ mov r9,QWORD[((0+224))+rsp]
+ mov r10,QWORD[((8+224))+rsp]
+ lea rsi,[((0+224))+rsp]
+ mov r11,QWORD[((16+224))+rsp]
+ mov r12,QWORD[((24+224))+rsp]
+ lea rdi,[224+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[512+rsp]
+ lea rbx,[512+rsp]
+ mov r9,QWORD[((0+256))+rsp]
+ mov r10,QWORD[((8+256))+rsp]
+ lea rsi,[((0+256))+rsp]
+ mov r11,QWORD[((16+256))+rsp]
+ mov r12,QWORD[((24+256))+rsp]
+ lea rdi,[256+rsp]
+ call __ecp_nistz256_mul_montq
+
+ lea rbx,[224+rsp]
+ lea rdi,[64+rsp]
+ call __ecp_nistz256_sub_fromq
+
+ or r12,r13
+ movdqa xmm2,xmm4
+ or r12,r8
+ or r12,r9
+ por xmm2,xmm5
+DB 102,73,15,110,220
+
+ mov rax,QWORD[384+rsp]
+ lea rbx,[384+rsp]
+ mov r9,QWORD[((0+96))+rsp]
+ mov r10,QWORD[((8+96))+rsp]
+ lea rsi,[((0+96))+rsp]
+ mov r11,QWORD[((16+96))+rsp]
+ mov r12,QWORD[((24+96))+rsp]
+ lea rdi,[160+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[480+rsp]
+ lea rbx,[480+rsp]
+ mov r9,QWORD[((0+32))+rsp]
+ mov r10,QWORD[((8+32))+rsp]
+ lea rsi,[((0+32))+rsp]
+ mov r11,QWORD[((16+32))+rsp]
+ mov r12,QWORD[((24+32))+rsp]
+ lea rdi,[192+rsp]
+ call __ecp_nistz256_mul_montq
+
+ lea rbx,[160+rsp]
+ lea rdi,[rsp]
+ call __ecp_nistz256_sub_fromq
+
+ or r12,r13
+ or r12,r8
+ or r12,r9
+
+DB 0x3e
+ jnz NEAR $L$add_proceedq
+DB 102,73,15,126,208
+DB 102,73,15,126,217
+ test r8,r8
+ jnz NEAR $L$add_proceedq
+ test r9,r9
+ jz NEAR $L$add_doubleq
+
+DB 102,72,15,126,199
+ pxor xmm0,xmm0
+ movdqu XMMWORD[rdi],xmm0
+ movdqu XMMWORD[16+rdi],xmm0
+ movdqu XMMWORD[32+rdi],xmm0
+ movdqu XMMWORD[48+rdi],xmm0
+ movdqu XMMWORD[64+rdi],xmm0
+ movdqu XMMWORD[80+rdi],xmm0
+ jmp NEAR $L$add_doneq
+
+ALIGN 32
+$L$add_doubleq:
+DB 102,72,15,126,206
+DB 102,72,15,126,199
+ add rsp,416
+ jmp NEAR $L$point_double_shortcutq
+
+ALIGN 32
+$L$add_proceedq:
+ mov rax,QWORD[((0+64))+rsp]
+ mov r14,QWORD[((8+64))+rsp]
+ lea rsi,[((0+64))+rsp]
+ mov r15,QWORD[((16+64))+rsp]
+ mov r8,QWORD[((24+64))+rsp]
+ lea rdi,[96+rsp]
+ call __ecp_nistz256_sqr_montq
+
+ mov rax,QWORD[448+rsp]
+ lea rbx,[448+rsp]
+ mov r9,QWORD[((0+0))+rsp]
+ mov r10,QWORD[((8+0))+rsp]
+ lea rsi,[((0+0))+rsp]
+ mov r11,QWORD[((16+0))+rsp]
+ mov r12,QWORD[((24+0))+rsp]
+ lea rdi,[352+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[((0+0))+rsp]
+ mov r14,QWORD[((8+0))+rsp]
+ lea rsi,[((0+0))+rsp]
+ mov r15,QWORD[((16+0))+rsp]
+ mov r8,QWORD[((24+0))+rsp]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_sqr_montq
+
+ mov rax,QWORD[544+rsp]
+ lea rbx,[544+rsp]
+ mov r9,QWORD[((0+352))+rsp]
+ mov r10,QWORD[((8+352))+rsp]
+ lea rsi,[((0+352))+rsp]
+ mov r11,QWORD[((16+352))+rsp]
+ mov r12,QWORD[((24+352))+rsp]
+ lea rdi,[352+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[rsp]
+ lea rbx,[rsp]
+ mov r9,QWORD[((0+32))+rsp]
+ mov r10,QWORD[((8+32))+rsp]
+ lea rsi,[((0+32))+rsp]
+ mov r11,QWORD[((16+32))+rsp]
+ mov r12,QWORD[((24+32))+rsp]
+ lea rdi,[128+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[160+rsp]
+ lea rbx,[160+rsp]
+ mov r9,QWORD[((0+32))+rsp]
+ mov r10,QWORD[((8+32))+rsp]
+ lea rsi,[((0+32))+rsp]
+ mov r11,QWORD[((16+32))+rsp]
+ mov r12,QWORD[((24+32))+rsp]
+ lea rdi,[192+rsp]
+ call __ecp_nistz256_mul_montq
+
+
+
+
+ add r12,r12
+ lea rsi,[96+rsp]
+ adc r13,r13
+ mov rax,r12
+ adc r8,r8
+ adc r9,r9
+ mov rbp,r13
+ sbb r11,r11
+
+ sub r12,-1
+ mov rcx,r8
+ sbb r13,r14
+ sbb r8,0
+ mov r10,r9
+ sbb r9,r15
+ test r11,r11
+
+ cmovz r12,rax
+ mov rax,QWORD[rsi]
+ cmovz r13,rbp
+ mov rbp,QWORD[8+rsi]
+ cmovz r8,rcx
+ mov rcx,QWORD[16+rsi]
+ cmovz r9,r10
+ mov r10,QWORD[24+rsi]
+
+ call __ecp_nistz256_subq
+
+ lea rbx,[128+rsp]
+ lea rdi,[288+rsp]
+ call __ecp_nistz256_sub_fromq
+
+ mov rax,QWORD[((192+0))+rsp]
+ mov rbp,QWORD[((192+8))+rsp]
+ mov rcx,QWORD[((192+16))+rsp]
+ mov r10,QWORD[((192+24))+rsp]
+ lea rdi,[320+rsp]
+
+ call __ecp_nistz256_subq
+
+ mov QWORD[rdi],r12
+ mov QWORD[8+rdi],r13
+ mov QWORD[16+rdi],r8
+ mov QWORD[24+rdi],r9
+ mov rax,QWORD[128+rsp]
+ lea rbx,[128+rsp]
+ mov r9,QWORD[((0+224))+rsp]
+ mov r10,QWORD[((8+224))+rsp]
+ lea rsi,[((0+224))+rsp]
+ mov r11,QWORD[((16+224))+rsp]
+ mov r12,QWORD[((24+224))+rsp]
+ lea rdi,[256+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[320+rsp]
+ lea rbx,[320+rsp]
+ mov r9,QWORD[((0+64))+rsp]
+ mov r10,QWORD[((8+64))+rsp]
+ lea rsi,[((0+64))+rsp]
+ mov r11,QWORD[((16+64))+rsp]
+ mov r12,QWORD[((24+64))+rsp]
+ lea rdi,[320+rsp]
+ call __ecp_nistz256_mul_montq
+
+ lea rbx,[256+rsp]
+ lea rdi,[320+rsp]
+ call __ecp_nistz256_sub_fromq
+
+DB 102,72,15,126,199
+
+ movdqa xmm0,xmm5
+ movdqa xmm1,xmm5
+ pandn xmm0,XMMWORD[352+rsp]
+ movdqa xmm2,xmm5
+ pandn xmm1,XMMWORD[((352+16))+rsp]
+ movdqa xmm3,xmm5
+ pand xmm2,XMMWORD[544+rsp]
+ pand xmm3,XMMWORD[((544+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+
+ movdqa xmm0,xmm4
+ movdqa xmm1,xmm4
+ pandn xmm0,xmm2
+ movdqa xmm2,xmm4
+ pandn xmm1,xmm3
+ movdqa xmm3,xmm4
+ pand xmm2,XMMWORD[448+rsp]
+ pand xmm3,XMMWORD[((448+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+ movdqu XMMWORD[64+rdi],xmm2
+ movdqu XMMWORD[80+rdi],xmm3
+
+ movdqa xmm0,xmm5
+ movdqa xmm1,xmm5
+ pandn xmm0,XMMWORD[288+rsp]
+ movdqa xmm2,xmm5
+ pandn xmm1,XMMWORD[((288+16))+rsp]
+ movdqa xmm3,xmm5
+ pand xmm2,XMMWORD[480+rsp]
+ pand xmm3,XMMWORD[((480+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+
+ movdqa xmm0,xmm4
+ movdqa xmm1,xmm4
+ pandn xmm0,xmm2
+ movdqa xmm2,xmm4
+ pandn xmm1,xmm3
+ movdqa xmm3,xmm4
+ pand xmm2,XMMWORD[384+rsp]
+ pand xmm3,XMMWORD[((384+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+ movdqu XMMWORD[rdi],xmm2
+ movdqu XMMWORD[16+rdi],xmm3
+
+ movdqa xmm0,xmm5
+ movdqa xmm1,xmm5
+ pandn xmm0,XMMWORD[320+rsp]
+ movdqa xmm2,xmm5
+ pandn xmm1,XMMWORD[((320+16))+rsp]
+ movdqa xmm3,xmm5
+ pand xmm2,XMMWORD[512+rsp]
+ pand xmm3,XMMWORD[((512+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+
+ movdqa xmm0,xmm4
+ movdqa xmm1,xmm4
+ pandn xmm0,xmm2
+ movdqa xmm2,xmm4
+ pandn xmm1,xmm3
+ movdqa xmm3,xmm4
+ pand xmm2,XMMWORD[416+rsp]
+ pand xmm3,XMMWORD[((416+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+ movdqu XMMWORD[32+rdi],xmm2
+ movdqu XMMWORD[48+rdi],xmm3
+
+$L$add_doneq:
+ add rsp,32*18+8
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+ pop rbp
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_point_add:
+global ecp_nistz256_point_add_affine
+
+ALIGN 32
+ecp_nistz256_point_add_affine:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_point_add_affine:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,32*15+8
+
+ movdqu xmm0,XMMWORD[rsi]
+ mov rbx,rdx
+ movdqu xmm1,XMMWORD[16+rsi]
+ movdqu xmm2,XMMWORD[32+rsi]
+ movdqu xmm3,XMMWORD[48+rsi]
+ movdqu xmm4,XMMWORD[64+rsi]
+ movdqu xmm5,XMMWORD[80+rsi]
+ mov rax,QWORD[((64+0))+rsi]
+ mov r14,QWORD[((64+8))+rsi]
+ mov r15,QWORD[((64+16))+rsi]
+ mov r8,QWORD[((64+24))+rsi]
+ movdqa XMMWORD[320+rsp],xmm0
+ movdqa XMMWORD[(320+16)+rsp],xmm1
+ por xmm1,xmm0
+ movdqa XMMWORD[352+rsp],xmm2
+ movdqa XMMWORD[(352+16)+rsp],xmm3
+ por xmm3,xmm2
+ movdqa XMMWORD[384+rsp],xmm4
+ movdqa XMMWORD[(384+16)+rsp],xmm5
+ por xmm3,xmm1
+
+ movdqu xmm0,XMMWORD[rbx]
+ pshufd xmm5,xmm3,0xb1
+ movdqu xmm1,XMMWORD[16+rbx]
+ movdqu xmm2,XMMWORD[32+rbx]
+ por xmm5,xmm3
+ movdqu xmm3,XMMWORD[48+rbx]
+ movdqa XMMWORD[416+rsp],xmm0
+ pshufd xmm4,xmm5,0x1e
+ movdqa XMMWORD[(416+16)+rsp],xmm1
+ por xmm1,xmm0
+DB 102,72,15,110,199
+ movdqa XMMWORD[448+rsp],xmm2
+ movdqa XMMWORD[(448+16)+rsp],xmm3
+ por xmm3,xmm2
+ por xmm5,xmm4
+ pxor xmm4,xmm4
+ por xmm3,xmm1
+
+ lea rsi,[((64-0))+rsi]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_sqr_montq
+
+ pcmpeqd xmm5,xmm4
+ pshufd xmm4,xmm3,0xb1
+ mov rax,QWORD[rbx]
+
+ mov r9,r12
+ por xmm4,xmm3
+ pshufd xmm5,xmm5,0
+ pshufd xmm3,xmm4,0x1e
+ mov r10,r13
+ por xmm4,xmm3
+ pxor xmm3,xmm3
+ mov r11,r14
+ pcmpeqd xmm4,xmm3
+ pshufd xmm4,xmm4,0
+
+ lea rsi,[((32-0))+rsp]
+ mov r12,r15
+ lea rdi,[rsp]
+ call __ecp_nistz256_mul_montq
+
+ lea rbx,[320+rsp]
+ lea rdi,[64+rsp]
+ call __ecp_nistz256_sub_fromq
+
+ mov rax,QWORD[384+rsp]
+ lea rbx,[384+rsp]
+ mov r9,QWORD[((0+32))+rsp]
+ mov r10,QWORD[((8+32))+rsp]
+ lea rsi,[((0+32))+rsp]
+ mov r11,QWORD[((16+32))+rsp]
+ mov r12,QWORD[((24+32))+rsp]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[384+rsp]
+ lea rbx,[384+rsp]
+ mov r9,QWORD[((0+64))+rsp]
+ mov r10,QWORD[((8+64))+rsp]
+ lea rsi,[((0+64))+rsp]
+ mov r11,QWORD[((16+64))+rsp]
+ mov r12,QWORD[((24+64))+rsp]
+ lea rdi,[288+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[448+rsp]
+ lea rbx,[448+rsp]
+ mov r9,QWORD[((0+32))+rsp]
+ mov r10,QWORD[((8+32))+rsp]
+ lea rsi,[((0+32))+rsp]
+ mov r11,QWORD[((16+32))+rsp]
+ mov r12,QWORD[((24+32))+rsp]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_mul_montq
+
+ lea rbx,[352+rsp]
+ lea rdi,[96+rsp]
+ call __ecp_nistz256_sub_fromq
+
+ mov rax,QWORD[((0+64))+rsp]
+ mov r14,QWORD[((8+64))+rsp]
+ lea rsi,[((0+64))+rsp]
+ mov r15,QWORD[((16+64))+rsp]
+ mov r8,QWORD[((24+64))+rsp]
+ lea rdi,[128+rsp]
+ call __ecp_nistz256_sqr_montq
+
+ mov rax,QWORD[((0+96))+rsp]
+ mov r14,QWORD[((8+96))+rsp]
+ lea rsi,[((0+96))+rsp]
+ mov r15,QWORD[((16+96))+rsp]
+ mov r8,QWORD[((24+96))+rsp]
+ lea rdi,[192+rsp]
+ call __ecp_nistz256_sqr_montq
+
+ mov rax,QWORD[128+rsp]
+ lea rbx,[128+rsp]
+ mov r9,QWORD[((0+64))+rsp]
+ mov r10,QWORD[((8+64))+rsp]
+ lea rsi,[((0+64))+rsp]
+ mov r11,QWORD[((16+64))+rsp]
+ mov r12,QWORD[((24+64))+rsp]
+ lea rdi,[160+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[320+rsp]
+ lea rbx,[320+rsp]
+ mov r9,QWORD[((0+128))+rsp]
+ mov r10,QWORD[((8+128))+rsp]
+ lea rsi,[((0+128))+rsp]
+ mov r11,QWORD[((16+128))+rsp]
+ mov r12,QWORD[((24+128))+rsp]
+ lea rdi,[rsp]
+ call __ecp_nistz256_mul_montq
+
+
+
+
+ add r12,r12
+ lea rsi,[192+rsp]
+ adc r13,r13
+ mov rax,r12
+ adc r8,r8
+ adc r9,r9
+ mov rbp,r13
+ sbb r11,r11
+
+ sub r12,-1
+ mov rcx,r8
+ sbb r13,r14
+ sbb r8,0
+ mov r10,r9
+ sbb r9,r15
+ test r11,r11
+
+ cmovz r12,rax
+ mov rax,QWORD[rsi]
+ cmovz r13,rbp
+ mov rbp,QWORD[8+rsi]
+ cmovz r8,rcx
+ mov rcx,QWORD[16+rsi]
+ cmovz r9,r10
+ mov r10,QWORD[24+rsi]
+
+ call __ecp_nistz256_subq
+
+ lea rbx,[160+rsp]
+ lea rdi,[224+rsp]
+ call __ecp_nistz256_sub_fromq
+
+ mov rax,QWORD[((0+0))+rsp]
+ mov rbp,QWORD[((0+8))+rsp]
+ mov rcx,QWORD[((0+16))+rsp]
+ mov r10,QWORD[((0+24))+rsp]
+ lea rdi,[64+rsp]
+
+ call __ecp_nistz256_subq
+
+ mov QWORD[rdi],r12
+ mov QWORD[8+rdi],r13
+ mov QWORD[16+rdi],r8
+ mov QWORD[24+rdi],r9
+ mov rax,QWORD[352+rsp]
+ lea rbx,[352+rsp]
+ mov r9,QWORD[((0+160))+rsp]
+ mov r10,QWORD[((8+160))+rsp]
+ lea rsi,[((0+160))+rsp]
+ mov r11,QWORD[((16+160))+rsp]
+ mov r12,QWORD[((24+160))+rsp]
+ lea rdi,[32+rsp]
+ call __ecp_nistz256_mul_montq
+
+ mov rax,QWORD[96+rsp]
+ lea rbx,[96+rsp]
+ mov r9,QWORD[((0+64))+rsp]
+ mov r10,QWORD[((8+64))+rsp]
+ lea rsi,[((0+64))+rsp]
+ mov r11,QWORD[((16+64))+rsp]
+ mov r12,QWORD[((24+64))+rsp]
+ lea rdi,[64+rsp]
+ call __ecp_nistz256_mul_montq
+
+ lea rbx,[32+rsp]
+ lea rdi,[256+rsp]
+ call __ecp_nistz256_sub_fromq
+
+DB 102,72,15,126,199
+
+ movdqa xmm0,xmm5
+ movdqa xmm1,xmm5
+ pandn xmm0,XMMWORD[288+rsp]
+ movdqa xmm2,xmm5
+ pandn xmm1,XMMWORD[((288+16))+rsp]
+ movdqa xmm3,xmm5
+ pand xmm2,XMMWORD[$L$ONE_mont]
+ pand xmm3,XMMWORD[(($L$ONE_mont+16))]
+ por xmm2,xmm0
+ por xmm3,xmm1
+
+ movdqa xmm0,xmm4
+ movdqa xmm1,xmm4
+ pandn xmm0,xmm2
+ movdqa xmm2,xmm4
+ pandn xmm1,xmm3
+ movdqa xmm3,xmm4
+ pand xmm2,XMMWORD[384+rsp]
+ pand xmm3,XMMWORD[((384+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+ movdqu XMMWORD[64+rdi],xmm2
+ movdqu XMMWORD[80+rdi],xmm3
+
+ movdqa xmm0,xmm5
+ movdqa xmm1,xmm5
+ pandn xmm0,XMMWORD[224+rsp]
+ movdqa xmm2,xmm5
+ pandn xmm1,XMMWORD[((224+16))+rsp]
+ movdqa xmm3,xmm5
+ pand xmm2,XMMWORD[416+rsp]
+ pand xmm3,XMMWORD[((416+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+
+ movdqa xmm0,xmm4
+ movdqa xmm1,xmm4
+ pandn xmm0,xmm2
+ movdqa xmm2,xmm4
+ pandn xmm1,xmm3
+ movdqa xmm3,xmm4
+ pand xmm2,XMMWORD[320+rsp]
+ pand xmm3,XMMWORD[((320+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+ movdqu XMMWORD[rdi],xmm2
+ movdqu XMMWORD[16+rdi],xmm3
+
+ movdqa xmm0,xmm5
+ movdqa xmm1,xmm5
+ pandn xmm0,XMMWORD[256+rsp]
+ movdqa xmm2,xmm5
+ pandn xmm1,XMMWORD[((256+16))+rsp]
+ movdqa xmm3,xmm5
+ pand xmm2,XMMWORD[448+rsp]
+ pand xmm3,XMMWORD[((448+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+
+ movdqa xmm0,xmm4
+ movdqa xmm1,xmm4
+ pandn xmm0,xmm2
+ movdqa xmm2,xmm4
+ pandn xmm1,xmm3
+ movdqa xmm3,xmm4
+ pand xmm2,XMMWORD[352+rsp]
+ pand xmm3,XMMWORD[((352+16))+rsp]
+ por xmm2,xmm0
+ por xmm3,xmm1
+ movdqu XMMWORD[32+rdi],xmm2
+ movdqu XMMWORD[48+rdi],xmm3
+
+ add rsp,32*15+8
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+ pop rbp
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_point_add_affine:

Powered by Google App Engine
This is Rietveld 408576698