Index: third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm |
diff --git a/third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm b/third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm |
new file mode 100644 |
index 0000000000000000000000000000000000000000..a2e40758194f84bc2b8df35fd20dc611155f170b |
--- /dev/null |
+++ b/third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm |
@@ -0,0 +1,1925 @@ |
+default rel |
+%define XMMWORD |
+%define YMMWORD |
+%define ZMMWORD |
+section .text code align=64 |
+ |
+EXTERN OPENSSL_ia32cap_P |
+ |
+ |
+ALIGN 64 |
+$L$poly: |
+ DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 |
+ |
+$L$One: |
+ DD 1,1,1,1,1,1,1,1 |
+$L$Two: |
+ DD 2,2,2,2,2,2,2,2 |
+$L$Three: |
+ DD 3,3,3,3,3,3,3,3 |
+$L$ONE_mont: |
+ DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe |
+ |
+ |
+ALIGN 64 |
+ecp_nistz256_mul_by_2: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_mul_by_2: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ |
+ |
+ push r12 |
+ push r13 |
+ |
+ mov r8,QWORD[rsi] |
+ mov r9,QWORD[8+rsi] |
+ add r8,r8 |
+ mov r10,QWORD[16+rsi] |
+ adc r9,r9 |
+ mov r11,QWORD[24+rsi] |
+ lea rsi,[$L$poly] |
+ mov rax,r8 |
+ adc r10,r10 |
+ adc r11,r11 |
+ mov rdx,r9 |
+ sbb r13,r13 |
+ |
+ sub r8,QWORD[rsi] |
+ mov rcx,r10 |
+ sbb r9,QWORD[8+rsi] |
+ sbb r10,QWORD[16+rsi] |
+ mov r12,r11 |
+ sbb r11,QWORD[24+rsi] |
+ test r13,r13 |
+ |
+ cmovz r8,rax |
+ cmovz r9,rdx |
+ mov QWORD[rdi],r8 |
+ cmovz r10,rcx |
+ mov QWORD[8+rdi],r9 |
+ cmovz r11,r12 |
+ mov QWORD[16+rdi],r10 |
+ mov QWORD[24+rdi],r11 |
+ |
+ pop r13 |
+ pop r12 |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_mul_by_2: |
+ |
+ |
+ |
+global ecp_nistz256_neg |
+ |
+ALIGN 32 |
+ecp_nistz256_neg: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_neg: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ |
+ |
+ push r12 |
+ push r13 |
+ |
+ xor r8,r8 |
+ xor r9,r9 |
+ xor r10,r10 |
+ xor r11,r11 |
+ xor r13,r13 |
+ |
+ sub r8,QWORD[rsi] |
+ sbb r9,QWORD[8+rsi] |
+ sbb r10,QWORD[16+rsi] |
+ mov rax,r8 |
+ sbb r11,QWORD[24+rsi] |
+ lea rsi,[$L$poly] |
+ mov rdx,r9 |
+ sbb r13,0 |
+ |
+ add r8,QWORD[rsi] |
+ mov rcx,r10 |
+ adc r9,QWORD[8+rsi] |
+ adc r10,QWORD[16+rsi] |
+ mov r12,r11 |
+ adc r11,QWORD[24+rsi] |
+ test r13,r13 |
+ |
+ cmovz r8,rax |
+ cmovz r9,rdx |
+ mov QWORD[rdi],r8 |
+ cmovz r10,rcx |
+ mov QWORD[8+rdi],r9 |
+ cmovz r11,r12 |
+ mov QWORD[16+rdi],r10 |
+ mov QWORD[24+rdi],r11 |
+ |
+ pop r13 |
+ pop r12 |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_neg: |
+ |
+ |
+ |
+ |
+ |
+ |
+global ecp_nistz256_mul_mont |
+ |
+ALIGN 32 |
+ecp_nistz256_mul_mont: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_mul_mont: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ mov rdx,r8 |
+ |
+ |
+$L$mul_mont: |
+ push rbp |
+ push rbx |
+ push r12 |
+ push r13 |
+ push r14 |
+ push r15 |
+ mov rbx,rdx |
+ mov rax,QWORD[rdx] |
+ mov r9,QWORD[rsi] |
+ mov r10,QWORD[8+rsi] |
+ mov r11,QWORD[16+rsi] |
+ mov r12,QWORD[24+rsi] |
+ |
+ call __ecp_nistz256_mul_montq |
+$L$mul_mont_done: |
+ pop r15 |
+ pop r14 |
+ pop r13 |
+ pop r12 |
+ pop rbx |
+ pop rbp |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_mul_mont: |
+ |
+ |
+ALIGN 32 |
+__ecp_nistz256_mul_montq: |
+ |
+ |
+ mov rbp,rax |
+ mul r9 |
+ mov r14,QWORD[(($L$poly+8))] |
+ mov r8,rax |
+ mov rax,rbp |
+ mov r9,rdx |
+ |
+ mul r10 |
+ mov r15,QWORD[(($L$poly+24))] |
+ add r9,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov r10,rdx |
+ |
+ mul r11 |
+ add r10,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov r11,rdx |
+ |
+ mul r12 |
+ add r11,rax |
+ mov rax,r8 |
+ adc rdx,0 |
+ xor r13,r13 |
+ mov r12,rdx |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ mov rbp,r8 |
+ shl r8,32 |
+ mul r15 |
+ shr rbp,32 |
+ add r9,r8 |
+ adc r10,rbp |
+ adc r11,rax |
+ mov rax,QWORD[8+rbx] |
+ adc r12,rdx |
+ adc r13,0 |
+ xor r8,r8 |
+ |
+ |
+ |
+ mov rbp,rax |
+ mul QWORD[rsi] |
+ add r9,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[8+rsi] |
+ add r10,rcx |
+ adc rdx,0 |
+ add r10,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[16+rsi] |
+ add r11,rcx |
+ adc rdx,0 |
+ add r11,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[24+rsi] |
+ add r12,rcx |
+ adc rdx,0 |
+ add r12,rax |
+ mov rax,r9 |
+ adc r13,rdx |
+ adc r8,0 |
+ |
+ |
+ |
+ mov rbp,r9 |
+ shl r9,32 |
+ mul r15 |
+ shr rbp,32 |
+ add r10,r9 |
+ adc r11,rbp |
+ adc r12,rax |
+ mov rax,QWORD[16+rbx] |
+ adc r13,rdx |
+ adc r8,0 |
+ xor r9,r9 |
+ |
+ |
+ |
+ mov rbp,rax |
+ mul QWORD[rsi] |
+ add r10,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[8+rsi] |
+ add r11,rcx |
+ adc rdx,0 |
+ add r11,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[16+rsi] |
+ add r12,rcx |
+ adc rdx,0 |
+ add r12,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[24+rsi] |
+ add r13,rcx |
+ adc rdx,0 |
+ add r13,rax |
+ mov rax,r10 |
+ adc r8,rdx |
+ adc r9,0 |
+ |
+ |
+ |
+ mov rbp,r10 |
+ shl r10,32 |
+ mul r15 |
+ shr rbp,32 |
+ add r11,r10 |
+ adc r12,rbp |
+ adc r13,rax |
+ mov rax,QWORD[24+rbx] |
+ adc r8,rdx |
+ adc r9,0 |
+ xor r10,r10 |
+ |
+ |
+ |
+ mov rbp,rax |
+ mul QWORD[rsi] |
+ add r11,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[8+rsi] |
+ add r12,rcx |
+ adc rdx,0 |
+ add r12,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[16+rsi] |
+ add r13,rcx |
+ adc rdx,0 |
+ add r13,rax |
+ mov rax,rbp |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul QWORD[24+rsi] |
+ add r8,rcx |
+ adc rdx,0 |
+ add r8,rax |
+ mov rax,r11 |
+ adc r9,rdx |
+ adc r10,0 |
+ |
+ |
+ |
+ mov rbp,r11 |
+ shl r11,32 |
+ mul r15 |
+ shr rbp,32 |
+ add r12,r11 |
+ adc r13,rbp |
+ mov rcx,r12 |
+ adc r8,rax |
+ adc r9,rdx |
+ mov rbp,r13 |
+ adc r10,0 |
+ |
+ |
+ |
+ sub r12,-1 |
+ mov rbx,r8 |
+ sbb r13,r14 |
+ sbb r8,0 |
+ mov rdx,r9 |
+ sbb r9,r15 |
+ sbb r10,0 |
+ |
+ cmovc r12,rcx |
+ cmovc r13,rbp |
+ mov QWORD[rdi],r12 |
+ cmovc r8,rbx |
+ mov QWORD[8+rdi],r13 |
+ cmovc r9,rdx |
+ mov QWORD[16+rdi],r8 |
+ mov QWORD[24+rdi],r9 |
+ |
+ DB 0F3h,0C3h ;repret |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+global ecp_nistz256_sqr_mont |
+ |
+ALIGN 32 |
+ecp_nistz256_sqr_mont: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_sqr_mont: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ |
+ |
+ push rbp |
+ push rbx |
+ push r12 |
+ push r13 |
+ push r14 |
+ push r15 |
+ mov rax,QWORD[rsi] |
+ mov r14,QWORD[8+rsi] |
+ mov r15,QWORD[16+rsi] |
+ mov r8,QWORD[24+rsi] |
+ |
+ call __ecp_nistz256_sqr_montq |
+$L$sqr_mont_done: |
+ pop r15 |
+ pop r14 |
+ pop r13 |
+ pop r12 |
+ pop rbx |
+ pop rbp |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_sqr_mont: |
+ |
+ |
+ALIGN 32 |
+__ecp_nistz256_sqr_montq: |
+ mov r13,rax |
+ mul r14 |
+ mov r9,rax |
+ mov rax,r15 |
+ mov r10,rdx |
+ |
+ mul r13 |
+ add r10,rax |
+ mov rax,r8 |
+ adc rdx,0 |
+ mov r11,rdx |
+ |
+ mul r13 |
+ add r11,rax |
+ mov rax,r15 |
+ adc rdx,0 |
+ mov r12,rdx |
+ |
+ |
+ mul r14 |
+ add r11,rax |
+ mov rax,r8 |
+ adc rdx,0 |
+ mov rbp,rdx |
+ |
+ mul r14 |
+ add r12,rax |
+ mov rax,r8 |
+ adc rdx,0 |
+ add r12,rbp |
+ mov r13,rdx |
+ adc r13,0 |
+ |
+ |
+ mul r15 |
+ xor r15,r15 |
+ add r13,rax |
+ mov rax,QWORD[rsi] |
+ mov r14,rdx |
+ adc r14,0 |
+ |
+ add r9,r9 |
+ adc r10,r10 |
+ adc r11,r11 |
+ adc r12,r12 |
+ adc r13,r13 |
+ adc r14,r14 |
+ adc r15,0 |
+ |
+ mul rax |
+ mov r8,rax |
+ mov rax,QWORD[8+rsi] |
+ mov rcx,rdx |
+ |
+ mul rax |
+ add r9,rcx |
+ adc r10,rax |
+ mov rax,QWORD[16+rsi] |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul rax |
+ add r11,rcx |
+ adc r12,rax |
+ mov rax,QWORD[24+rsi] |
+ adc rdx,0 |
+ mov rcx,rdx |
+ |
+ mul rax |
+ add r13,rcx |
+ adc r14,rax |
+ mov rax,r8 |
+ adc r15,rdx |
+ |
+ mov rsi,QWORD[(($L$poly+8))] |
+ mov rbp,QWORD[(($L$poly+24))] |
+ |
+ |
+ |
+ |
+ mov rcx,r8 |
+ shl r8,32 |
+ mul rbp |
+ shr rcx,32 |
+ add r9,r8 |
+ adc r10,rcx |
+ adc r11,rax |
+ mov rax,r9 |
+ adc rdx,0 |
+ |
+ |
+ |
+ mov rcx,r9 |
+ shl r9,32 |
+ mov r8,rdx |
+ mul rbp |
+ shr rcx,32 |
+ add r10,r9 |
+ adc r11,rcx |
+ adc r8,rax |
+ mov rax,r10 |
+ adc rdx,0 |
+ |
+ |
+ |
+ mov rcx,r10 |
+ shl r10,32 |
+ mov r9,rdx |
+ mul rbp |
+ shr rcx,32 |
+ add r11,r10 |
+ adc r8,rcx |
+ adc r9,rax |
+ mov rax,r11 |
+ adc rdx,0 |
+ |
+ |
+ |
+ mov rcx,r11 |
+ shl r11,32 |
+ mov r10,rdx |
+ mul rbp |
+ shr rcx,32 |
+ add r8,r11 |
+ adc r9,rcx |
+ adc r10,rax |
+ adc rdx,0 |
+ xor r11,r11 |
+ |
+ |
+ |
+ add r12,r8 |
+ adc r13,r9 |
+ mov r8,r12 |
+ adc r14,r10 |
+ adc r15,rdx |
+ mov r9,r13 |
+ adc r11,0 |
+ |
+ sub r12,-1 |
+ mov r10,r14 |
+ sbb r13,rsi |
+ sbb r14,0 |
+ mov rcx,r15 |
+ sbb r15,rbp |
+ sbb r11,0 |
+ |
+ cmovc r12,r8 |
+ cmovc r13,r9 |
+ mov QWORD[rdi],r12 |
+ cmovc r14,r10 |
+ mov QWORD[8+rdi],r13 |
+ cmovc r15,rcx |
+ mov QWORD[16+rdi],r14 |
+ mov QWORD[24+rdi],r15 |
+ |
+ DB 0F3h,0C3h ;repret |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+global ecp_nistz256_from_mont |
+ |
+ALIGN 32 |
+ecp_nistz256_from_mont: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_from_mont: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ |
+ |
+ push r12 |
+ push r13 |
+ |
+ mov rax,QWORD[rsi] |
+ mov r13,QWORD[(($L$poly+24))] |
+ mov r9,QWORD[8+rsi] |
+ mov r10,QWORD[16+rsi] |
+ mov r11,QWORD[24+rsi] |
+ mov r8,rax |
+ mov r12,QWORD[(($L$poly+8))] |
+ |
+ |
+ |
+ mov rcx,rax |
+ shl r8,32 |
+ mul r13 |
+ shr rcx,32 |
+ add r9,r8 |
+ adc r10,rcx |
+ adc r11,rax |
+ mov rax,r9 |
+ adc rdx,0 |
+ |
+ |
+ |
+ mov rcx,r9 |
+ shl r9,32 |
+ mov r8,rdx |
+ mul r13 |
+ shr rcx,32 |
+ add r10,r9 |
+ adc r11,rcx |
+ adc r8,rax |
+ mov rax,r10 |
+ adc rdx,0 |
+ |
+ |
+ |
+ mov rcx,r10 |
+ shl r10,32 |
+ mov r9,rdx |
+ mul r13 |
+ shr rcx,32 |
+ add r11,r10 |
+ adc r8,rcx |
+ adc r9,rax |
+ mov rax,r11 |
+ adc rdx,0 |
+ |
+ |
+ |
+ mov rcx,r11 |
+ shl r11,32 |
+ mov r10,rdx |
+ mul r13 |
+ shr rcx,32 |
+ add r8,r11 |
+ adc r9,rcx |
+ mov rcx,r8 |
+ adc r10,rax |
+ mov rsi,r9 |
+ adc rdx,0 |
+ |
+ sub r8,-1 |
+ mov rax,r10 |
+ sbb r9,r12 |
+ sbb r10,0 |
+ mov r11,rdx |
+ sbb rdx,r13 |
+ sbb r13,r13 |
+ |
+ cmovnz r8,rcx |
+ cmovnz r9,rsi |
+ mov QWORD[rdi],r8 |
+ cmovnz r10,rax |
+ mov QWORD[8+rdi],r9 |
+ cmovz r11,rdx |
+ mov QWORD[16+rdi],r10 |
+ mov QWORD[24+rdi],r11 |
+ |
+ pop r13 |
+ pop r12 |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_from_mont: |
+ |
+ |
+global ecp_nistz256_select_w5 |
+ |
+ALIGN 32 |
+ecp_nistz256_select_w5: |
+ lea rax,[((-136))+rsp] |
+$L$SEH_begin_ecp_nistz256_select_w5: |
+DB 0x48,0x8d,0x60,0xe0 |
+DB 0x0f,0x29,0x70,0xe0 |
+DB 0x0f,0x29,0x78,0xf0 |
+DB 0x44,0x0f,0x29,0x00 |
+DB 0x44,0x0f,0x29,0x48,0x10 |
+DB 0x44,0x0f,0x29,0x50,0x20 |
+DB 0x44,0x0f,0x29,0x58,0x30 |
+DB 0x44,0x0f,0x29,0x60,0x40 |
+DB 0x44,0x0f,0x29,0x68,0x50 |
+DB 0x44,0x0f,0x29,0x70,0x60 |
+DB 0x44,0x0f,0x29,0x78,0x70 |
+ movdqa xmm0,XMMWORD[$L$One] |
+ movd xmm1,r8d |
+ |
+ pxor xmm2,xmm2 |
+ pxor xmm3,xmm3 |
+ pxor xmm4,xmm4 |
+ pxor xmm5,xmm5 |
+ pxor xmm6,xmm6 |
+ pxor xmm7,xmm7 |
+ |
+ movdqa xmm8,xmm0 |
+ pshufd xmm1,xmm1,0 |
+ |
+ mov rax,16 |
+$L$select_loop_sse_w5: |
+ |
+ movdqa xmm15,xmm8 |
+ paddd xmm8,xmm0 |
+ pcmpeqd xmm15,xmm1 |
+ |
+ movdqa xmm9,XMMWORD[rdx] |
+ movdqa xmm10,XMMWORD[16+rdx] |
+ movdqa xmm11,XMMWORD[32+rdx] |
+ movdqa xmm12,XMMWORD[48+rdx] |
+ movdqa xmm13,XMMWORD[64+rdx] |
+ movdqa xmm14,XMMWORD[80+rdx] |
+ lea rdx,[96+rdx] |
+ |
+ pand xmm9,xmm15 |
+ pand xmm10,xmm15 |
+ por xmm2,xmm9 |
+ pand xmm11,xmm15 |
+ por xmm3,xmm10 |
+ pand xmm12,xmm15 |
+ por xmm4,xmm11 |
+ pand xmm13,xmm15 |
+ por xmm5,xmm12 |
+ pand xmm14,xmm15 |
+ por xmm6,xmm13 |
+ por xmm7,xmm14 |
+ |
+ dec rax |
+ jnz NEAR $L$select_loop_sse_w5 |
+ |
+ movdqu XMMWORD[rcx],xmm2 |
+ movdqu XMMWORD[16+rcx],xmm3 |
+ movdqu XMMWORD[32+rcx],xmm4 |
+ movdqu XMMWORD[48+rcx],xmm5 |
+ movdqu XMMWORD[64+rcx],xmm6 |
+ movdqu XMMWORD[80+rcx],xmm7 |
+ movaps xmm6,XMMWORD[rsp] |
+ movaps xmm7,XMMWORD[16+rsp] |
+ movaps xmm8,XMMWORD[32+rsp] |
+ movaps xmm9,XMMWORD[48+rsp] |
+ movaps xmm10,XMMWORD[64+rsp] |
+ movaps xmm11,XMMWORD[80+rsp] |
+ movaps xmm12,XMMWORD[96+rsp] |
+ movaps xmm13,XMMWORD[112+rsp] |
+ movaps xmm14,XMMWORD[128+rsp] |
+ movaps xmm15,XMMWORD[144+rsp] |
+ lea rsp,[168+rsp] |
+$L$SEH_end_ecp_nistz256_select_w5: |
+ DB 0F3h,0C3h ;repret |
+ |
+ |
+ |
+ |
+global ecp_nistz256_select_w7 |
+ |
+ALIGN 32 |
+ecp_nistz256_select_w7: |
+ lea rax,[((-136))+rsp] |
+$L$SEH_begin_ecp_nistz256_select_w7: |
+DB 0x48,0x8d,0x60,0xe0 |
+DB 0x0f,0x29,0x70,0xe0 |
+DB 0x0f,0x29,0x78,0xf0 |
+DB 0x44,0x0f,0x29,0x00 |
+DB 0x44,0x0f,0x29,0x48,0x10 |
+DB 0x44,0x0f,0x29,0x50,0x20 |
+DB 0x44,0x0f,0x29,0x58,0x30 |
+DB 0x44,0x0f,0x29,0x60,0x40 |
+DB 0x44,0x0f,0x29,0x68,0x50 |
+DB 0x44,0x0f,0x29,0x70,0x60 |
+DB 0x44,0x0f,0x29,0x78,0x70 |
+ movdqa xmm8,XMMWORD[$L$One] |
+ movd xmm1,r8d |
+ |
+ pxor xmm2,xmm2 |
+ pxor xmm3,xmm3 |
+ pxor xmm4,xmm4 |
+ pxor xmm5,xmm5 |
+ |
+ movdqa xmm0,xmm8 |
+ pshufd xmm1,xmm1,0 |
+ mov rax,64 |
+ |
+$L$select_loop_sse_w7: |
+ movdqa xmm15,xmm8 |
+ paddd xmm8,xmm0 |
+ movdqa xmm9,XMMWORD[rdx] |
+ movdqa xmm10,XMMWORD[16+rdx] |
+ pcmpeqd xmm15,xmm1 |
+ movdqa xmm11,XMMWORD[32+rdx] |
+ movdqa xmm12,XMMWORD[48+rdx] |
+ lea rdx,[64+rdx] |
+ |
+ pand xmm9,xmm15 |
+ pand xmm10,xmm15 |
+ por xmm2,xmm9 |
+ pand xmm11,xmm15 |
+ por xmm3,xmm10 |
+ pand xmm12,xmm15 |
+ por xmm4,xmm11 |
+ prefetcht0 [255+rdx] |
+ por xmm5,xmm12 |
+ |
+ dec rax |
+ jnz NEAR $L$select_loop_sse_w7 |
+ |
+ movdqu XMMWORD[rcx],xmm2 |
+ movdqu XMMWORD[16+rcx],xmm3 |
+ movdqu XMMWORD[32+rcx],xmm4 |
+ movdqu XMMWORD[48+rcx],xmm5 |
+ movaps xmm6,XMMWORD[rsp] |
+ movaps xmm7,XMMWORD[16+rsp] |
+ movaps xmm8,XMMWORD[32+rsp] |
+ movaps xmm9,XMMWORD[48+rsp] |
+ movaps xmm10,XMMWORD[64+rsp] |
+ movaps xmm11,XMMWORD[80+rsp] |
+ movaps xmm12,XMMWORD[96+rsp] |
+ movaps xmm13,XMMWORD[112+rsp] |
+ movaps xmm14,XMMWORD[128+rsp] |
+ movaps xmm15,XMMWORD[144+rsp] |
+ lea rsp,[168+rsp] |
+$L$SEH_end_ecp_nistz256_select_w7: |
+ DB 0F3h,0C3h ;repret |
+ |
+global ecp_nistz256_avx2_select_w7 |
+ |
+ALIGN 32 |
+ecp_nistz256_avx2_select_w7: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_avx2_select_w7: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ mov rdx,r8 |
+ |
+ |
+DB 0x0f,0x0b |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_avx2_select_w7: |
+ |
+ALIGN 32 |
+__ecp_nistz256_add_toq: |
+ add r12,QWORD[rbx] |
+ adc r13,QWORD[8+rbx] |
+ mov rax,r12 |
+ adc r8,QWORD[16+rbx] |
+ adc r9,QWORD[24+rbx] |
+ mov rbp,r13 |
+ sbb r11,r11 |
+ |
+ sub r12,-1 |
+ mov rcx,r8 |
+ sbb r13,r14 |
+ sbb r8,0 |
+ mov r10,r9 |
+ sbb r9,r15 |
+ test r11,r11 |
+ |
+ cmovz r12,rax |
+ cmovz r13,rbp |
+ mov QWORD[rdi],r12 |
+ cmovz r8,rcx |
+ mov QWORD[8+rdi],r13 |
+ cmovz r9,r10 |
+ mov QWORD[16+rdi],r8 |
+ mov QWORD[24+rdi],r9 |
+ |
+ DB 0F3h,0C3h ;repret |
+ |
+ |
+ |
+ALIGN 32 |
+__ecp_nistz256_sub_fromq: |
+ sub r12,QWORD[rbx] |
+ sbb r13,QWORD[8+rbx] |
+ mov rax,r12 |
+ sbb r8,QWORD[16+rbx] |
+ sbb r9,QWORD[24+rbx] |
+ mov rbp,r13 |
+ sbb r11,r11 |
+ |
+ add r12,-1 |
+ mov rcx,r8 |
+ adc r13,r14 |
+ adc r8,0 |
+ mov r10,r9 |
+ adc r9,r15 |
+ test r11,r11 |
+ |
+ cmovz r12,rax |
+ cmovz r13,rbp |
+ mov QWORD[rdi],r12 |
+ cmovz r8,rcx |
+ mov QWORD[8+rdi],r13 |
+ cmovz r9,r10 |
+ mov QWORD[16+rdi],r8 |
+ mov QWORD[24+rdi],r9 |
+ |
+ DB 0F3h,0C3h ;repret |
+ |
+ |
+ |
+ALIGN 32 |
+__ecp_nistz256_subq: |
+ sub rax,r12 |
+ sbb rbp,r13 |
+ mov r12,rax |
+ sbb rcx,r8 |
+ sbb r10,r9 |
+ mov r13,rbp |
+ sbb r11,r11 |
+ |
+ add rax,-1 |
+ mov r8,rcx |
+ adc rbp,r14 |
+ adc rcx,0 |
+ mov r9,r10 |
+ adc r10,r15 |
+ test r11,r11 |
+ |
+ cmovnz r12,rax |
+ cmovnz r13,rbp |
+ cmovnz r8,rcx |
+ cmovnz r9,r10 |
+ |
+ DB 0F3h,0C3h ;repret |
+ |
+ |
+ |
+ALIGN 32 |
+__ecp_nistz256_mul_by_2q: |
+ add r12,r12 |
+ adc r13,r13 |
+ mov rax,r12 |
+ adc r8,r8 |
+ adc r9,r9 |
+ mov rbp,r13 |
+ sbb r11,r11 |
+ |
+ sub r12,-1 |
+ mov rcx,r8 |
+ sbb r13,r14 |
+ sbb r8,0 |
+ mov r10,r9 |
+ sbb r9,r15 |
+ test r11,r11 |
+ |
+ cmovz r12,rax |
+ cmovz r13,rbp |
+ mov QWORD[rdi],r12 |
+ cmovz r8,rcx |
+ mov QWORD[8+rdi],r13 |
+ cmovz r9,r10 |
+ mov QWORD[16+rdi],r8 |
+ mov QWORD[24+rdi],r9 |
+ |
+ DB 0F3h,0C3h ;repret |
+ |
+global ecp_nistz256_point_double |
+ |
+ALIGN 32 |
+ecp_nistz256_point_double: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_point_double: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ |
+ |
+ push rbp |
+ push rbx |
+ push r12 |
+ push r13 |
+ push r14 |
+ push r15 |
+ sub rsp,32*5+8 |
+ |
+$L$point_double_shortcutq: |
+ movdqu xmm0,XMMWORD[rsi] |
+ mov rbx,rsi |
+ movdqu xmm1,XMMWORD[16+rsi] |
+ mov r12,QWORD[((32+0))+rsi] |
+ mov r13,QWORD[((32+8))+rsi] |
+ mov r8,QWORD[((32+16))+rsi] |
+ mov r9,QWORD[((32+24))+rsi] |
+ mov r14,QWORD[(($L$poly+8))] |
+ mov r15,QWORD[(($L$poly+24))] |
+ movdqa XMMWORD[96+rsp],xmm0 |
+ movdqa XMMWORD[(96+16)+rsp],xmm1 |
+ lea r10,[32+rdi] |
+ lea r11,[64+rdi] |
+DB 102,72,15,110,199 |
+DB 102,73,15,110,202 |
+DB 102,73,15,110,211 |
+ |
+ lea rdi,[rsp] |
+ call __ecp_nistz256_mul_by_2q |
+ |
+ mov rax,QWORD[((64+0))+rsi] |
+ mov r14,QWORD[((64+8))+rsi] |
+ mov r15,QWORD[((64+16))+rsi] |
+ mov r8,QWORD[((64+24))+rsi] |
+ lea rsi,[((64-0))+rsi] |
+ lea rdi,[64+rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ mov rax,QWORD[((0+0))+rsp] |
+ mov r14,QWORD[((8+0))+rsp] |
+ lea rsi,[((0+0))+rsp] |
+ mov r15,QWORD[((16+0))+rsp] |
+ mov r8,QWORD[((24+0))+rsp] |
+ lea rdi,[rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ mov rax,QWORD[32+rbx] |
+ mov r9,QWORD[((64+0))+rbx] |
+ mov r10,QWORD[((64+8))+rbx] |
+ mov r11,QWORD[((64+16))+rbx] |
+ mov r12,QWORD[((64+24))+rbx] |
+ lea rsi,[((64-0))+rbx] |
+ lea rbx,[32+rbx] |
+DB 102,72,15,126,215 |
+ call __ecp_nistz256_mul_montq |
+ call __ecp_nistz256_mul_by_2q |
+ |
+ mov r12,QWORD[((96+0))+rsp] |
+ mov r13,QWORD[((96+8))+rsp] |
+ lea rbx,[64+rsp] |
+ mov r8,QWORD[((96+16))+rsp] |
+ mov r9,QWORD[((96+24))+rsp] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_add_toq |
+ |
+ mov r12,QWORD[((96+0))+rsp] |
+ mov r13,QWORD[((96+8))+rsp] |
+ lea rbx,[64+rsp] |
+ mov r8,QWORD[((96+16))+rsp] |
+ mov r9,QWORD[((96+24))+rsp] |
+ lea rdi,[64+rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+ mov rax,QWORD[((0+0))+rsp] |
+ mov r14,QWORD[((8+0))+rsp] |
+ lea rsi,[((0+0))+rsp] |
+ mov r15,QWORD[((16+0))+rsp] |
+ mov r8,QWORD[((24+0))+rsp] |
+DB 102,72,15,126,207 |
+ call __ecp_nistz256_sqr_montq |
+ xor r9,r9 |
+ mov rax,r12 |
+ add r12,-1 |
+ mov r10,r13 |
+ adc r13,rsi |
+ mov rcx,r14 |
+ adc r14,0 |
+ mov r8,r15 |
+ adc r15,rbp |
+ adc r9,0 |
+ xor rsi,rsi |
+ test rax,1 |
+ |
+ cmovz r12,rax |
+ cmovz r13,r10 |
+ cmovz r14,rcx |
+ cmovz r15,r8 |
+ cmovz r9,rsi |
+ |
+ mov rax,r13 |
+ shr r12,1 |
+ shl rax,63 |
+ mov r10,r14 |
+ shr r13,1 |
+ or r12,rax |
+ shl r10,63 |
+ mov rcx,r15 |
+ shr r14,1 |
+ or r13,r10 |
+ shl rcx,63 |
+ mov QWORD[rdi],r12 |
+ shr r15,1 |
+ mov QWORD[8+rdi],r13 |
+ shl r9,63 |
+ or r14,rcx |
+ or r15,r9 |
+ mov QWORD[16+rdi],r14 |
+ mov QWORD[24+rdi],r15 |
+ mov rax,QWORD[64+rsp] |
+ lea rbx,[64+rsp] |
+ mov r9,QWORD[((0+32))+rsp] |
+ mov r10,QWORD[((8+32))+rsp] |
+ lea rsi,[((0+32))+rsp] |
+ mov r11,QWORD[((16+32))+rsp] |
+ mov r12,QWORD[((24+32))+rsp] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ lea rdi,[128+rsp] |
+ call __ecp_nistz256_mul_by_2q |
+ |
+ lea rbx,[32+rsp] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_add_toq |
+ |
+ mov rax,QWORD[96+rsp] |
+ lea rbx,[96+rsp] |
+ mov r9,QWORD[((0+0))+rsp] |
+ mov r10,QWORD[((8+0))+rsp] |
+ lea rsi,[((0+0))+rsp] |
+ mov r11,QWORD[((16+0))+rsp] |
+ mov r12,QWORD[((24+0))+rsp] |
+ lea rdi,[rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ lea rdi,[128+rsp] |
+ call __ecp_nistz256_mul_by_2q |
+ |
+ mov rax,QWORD[((0+32))+rsp] |
+ mov r14,QWORD[((8+32))+rsp] |
+ lea rsi,[((0+32))+rsp] |
+ mov r15,QWORD[((16+32))+rsp] |
+ mov r8,QWORD[((24+32))+rsp] |
+DB 102,72,15,126,199 |
+ call __ecp_nistz256_sqr_montq |
+ |
+ lea rbx,[128+rsp] |
+ mov r8,r14 |
+ mov r9,r15 |
+ mov r14,rsi |
+ mov r15,rbp |
+ call __ecp_nistz256_sub_fromq |
+ |
+ mov rax,QWORD[((0+0))+rsp] |
+ mov rbp,QWORD[((0+8))+rsp] |
+ mov rcx,QWORD[((0+16))+rsp] |
+ mov r10,QWORD[((0+24))+rsp] |
+ lea rdi,[rsp] |
+ call __ecp_nistz256_subq |
+ |
+ mov rax,QWORD[32+rsp] |
+ lea rbx,[32+rsp] |
+ mov r14,r12 |
+ xor ecx,ecx |
+ mov QWORD[((0+0))+rsp],r12 |
+ mov r10,r13 |
+ mov QWORD[((0+8))+rsp],r13 |
+ cmovz r11,r8 |
+ mov QWORD[((0+16))+rsp],r8 |
+ lea rsi,[((0-0))+rsp] |
+ cmovz r12,r9 |
+ mov QWORD[((0+24))+rsp],r9 |
+ mov r9,r14 |
+ lea rdi,[rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+DB 102,72,15,126,203 |
+DB 102,72,15,126,207 |
+ call __ecp_nistz256_sub_fromq |
+ |
+ add rsp,32*5+8 |
+ pop r15 |
+ pop r14 |
+ pop r13 |
+ pop r12 |
+ pop rbx |
+ pop rbp |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_point_double: |
+global ecp_nistz256_point_add |
+ |
+ALIGN 32 |
+ecp_nistz256_point_add: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_point_add: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ mov rdx,r8 |
+ |
+ |
+ push rbp |
+ push rbx |
+ push r12 |
+ push r13 |
+ push r14 |
+ push r15 |
+ sub rsp,32*18+8 |
+ |
+ movdqu xmm0,XMMWORD[rsi] |
+ movdqu xmm1,XMMWORD[16+rsi] |
+ movdqu xmm2,XMMWORD[32+rsi] |
+ movdqu xmm3,XMMWORD[48+rsi] |
+ movdqu xmm4,XMMWORD[64+rsi] |
+ movdqu xmm5,XMMWORD[80+rsi] |
+ mov rbx,rsi |
+ mov rsi,rdx |
+ movdqa XMMWORD[384+rsp],xmm0 |
+ movdqa XMMWORD[(384+16)+rsp],xmm1 |
+ por xmm1,xmm0 |
+ movdqa XMMWORD[416+rsp],xmm2 |
+ movdqa XMMWORD[(416+16)+rsp],xmm3 |
+ por xmm3,xmm2 |
+ movdqa XMMWORD[448+rsp],xmm4 |
+ movdqa XMMWORD[(448+16)+rsp],xmm5 |
+ por xmm3,xmm1 |
+ |
+ movdqu xmm0,XMMWORD[rsi] |
+ pshufd xmm5,xmm3,0xb1 |
+ movdqu xmm1,XMMWORD[16+rsi] |
+ movdqu xmm2,XMMWORD[32+rsi] |
+ por xmm5,xmm3 |
+ movdqu xmm3,XMMWORD[48+rsi] |
+ mov rax,QWORD[((64+0))+rsi] |
+ mov r14,QWORD[((64+8))+rsi] |
+ mov r15,QWORD[((64+16))+rsi] |
+ mov r8,QWORD[((64+24))+rsi] |
+ movdqa XMMWORD[480+rsp],xmm0 |
+ pshufd xmm4,xmm5,0x1e |
+ movdqa XMMWORD[(480+16)+rsp],xmm1 |
+ por xmm1,xmm0 |
+DB 102,72,15,110,199 |
+ movdqa XMMWORD[512+rsp],xmm2 |
+ movdqa XMMWORD[(512+16)+rsp],xmm3 |
+ por xmm3,xmm2 |
+ por xmm5,xmm4 |
+ pxor xmm4,xmm4 |
+ por xmm3,xmm1 |
+ |
+ lea rsi,[((64-0))+rsi] |
+ mov QWORD[((544+0))+rsp],rax |
+ mov QWORD[((544+8))+rsp],r14 |
+ mov QWORD[((544+16))+rsp],r15 |
+ mov QWORD[((544+24))+rsp],r8 |
+ lea rdi,[96+rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ pcmpeqd xmm5,xmm4 |
+ pshufd xmm4,xmm3,0xb1 |
+ por xmm4,xmm3 |
+ pshufd xmm5,xmm5,0 |
+ pshufd xmm3,xmm4,0x1e |
+ por xmm4,xmm3 |
+ pxor xmm3,xmm3 |
+ pcmpeqd xmm4,xmm3 |
+ pshufd xmm4,xmm4,0 |
+ mov rax,QWORD[((64+0))+rbx] |
+ mov r14,QWORD[((64+8))+rbx] |
+ mov r15,QWORD[((64+16))+rbx] |
+ mov r8,QWORD[((64+24))+rbx] |
+DB 102,72,15,110,203 |
+ |
+ lea rsi,[((64-0))+rbx] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ mov rax,QWORD[544+rsp] |
+ lea rbx,[544+rsp] |
+ mov r9,QWORD[((0+96))+rsp] |
+ mov r10,QWORD[((8+96))+rsp] |
+ lea rsi,[((0+96))+rsp] |
+ mov r11,QWORD[((16+96))+rsp] |
+ mov r12,QWORD[((24+96))+rsp] |
+ lea rdi,[224+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[448+rsp] |
+ lea rbx,[448+rsp] |
+ mov r9,QWORD[((0+32))+rsp] |
+ mov r10,QWORD[((8+32))+rsp] |
+ lea rsi,[((0+32))+rsp] |
+ mov r11,QWORD[((16+32))+rsp] |
+ mov r12,QWORD[((24+32))+rsp] |
+ lea rdi,[256+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[416+rsp] |
+ lea rbx,[416+rsp] |
+ mov r9,QWORD[((0+224))+rsp] |
+ mov r10,QWORD[((8+224))+rsp] |
+ lea rsi,[((0+224))+rsp] |
+ mov r11,QWORD[((16+224))+rsp] |
+ mov r12,QWORD[((24+224))+rsp] |
+ lea rdi,[224+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[512+rsp] |
+ lea rbx,[512+rsp] |
+ mov r9,QWORD[((0+256))+rsp] |
+ mov r10,QWORD[((8+256))+rsp] |
+ lea rsi,[((0+256))+rsp] |
+ mov r11,QWORD[((16+256))+rsp] |
+ mov r12,QWORD[((24+256))+rsp] |
+ lea rdi,[256+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ lea rbx,[224+rsp] |
+ lea rdi,[64+rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+ or r12,r13 |
+ movdqa xmm2,xmm4 |
+ or r12,r8 |
+ or r12,r9 |
+ por xmm2,xmm5 |
+DB 102,73,15,110,220 |
+ |
+ mov rax,QWORD[384+rsp] |
+ lea rbx,[384+rsp] |
+ mov r9,QWORD[((0+96))+rsp] |
+ mov r10,QWORD[((8+96))+rsp] |
+ lea rsi,[((0+96))+rsp] |
+ mov r11,QWORD[((16+96))+rsp] |
+ mov r12,QWORD[((24+96))+rsp] |
+ lea rdi,[160+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[480+rsp] |
+ lea rbx,[480+rsp] |
+ mov r9,QWORD[((0+32))+rsp] |
+ mov r10,QWORD[((8+32))+rsp] |
+ lea rsi,[((0+32))+rsp] |
+ mov r11,QWORD[((16+32))+rsp] |
+ mov r12,QWORD[((24+32))+rsp] |
+ lea rdi,[192+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ lea rbx,[160+rsp] |
+ lea rdi,[rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+ or r12,r13 |
+ or r12,r8 |
+ or r12,r9 |
+ |
+DB 0x3e |
+ jnz NEAR $L$add_proceedq |
+DB 102,73,15,126,208 |
+DB 102,73,15,126,217 |
+ test r8,r8 |
+ jnz NEAR $L$add_proceedq |
+ test r9,r9 |
+ jz NEAR $L$add_doubleq |
+ |
+DB 102,72,15,126,199 |
+ pxor xmm0,xmm0 |
+ movdqu XMMWORD[rdi],xmm0 |
+ movdqu XMMWORD[16+rdi],xmm0 |
+ movdqu XMMWORD[32+rdi],xmm0 |
+ movdqu XMMWORD[48+rdi],xmm0 |
+ movdqu XMMWORD[64+rdi],xmm0 |
+ movdqu XMMWORD[80+rdi],xmm0 |
+ jmp NEAR $L$add_doneq |
+ |
+ALIGN 32 |
+$L$add_doubleq: |
+DB 102,72,15,126,206 |
+DB 102,72,15,126,199 |
+ add rsp,416 |
+ jmp NEAR $L$point_double_shortcutq |
+ |
+ALIGN 32 |
+$L$add_proceedq: |
+ mov rax,QWORD[((0+64))+rsp] |
+ mov r14,QWORD[((8+64))+rsp] |
+ lea rsi,[((0+64))+rsp] |
+ mov r15,QWORD[((16+64))+rsp] |
+ mov r8,QWORD[((24+64))+rsp] |
+ lea rdi,[96+rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ mov rax,QWORD[448+rsp] |
+ lea rbx,[448+rsp] |
+ mov r9,QWORD[((0+0))+rsp] |
+ mov r10,QWORD[((8+0))+rsp] |
+ lea rsi,[((0+0))+rsp] |
+ mov r11,QWORD[((16+0))+rsp] |
+ mov r12,QWORD[((24+0))+rsp] |
+ lea rdi,[352+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[((0+0))+rsp] |
+ mov r14,QWORD[((8+0))+rsp] |
+ lea rsi,[((0+0))+rsp] |
+ mov r15,QWORD[((16+0))+rsp] |
+ mov r8,QWORD[((24+0))+rsp] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ mov rax,QWORD[544+rsp] |
+ lea rbx,[544+rsp] |
+ mov r9,QWORD[((0+352))+rsp] |
+ mov r10,QWORD[((8+352))+rsp] |
+ lea rsi,[((0+352))+rsp] |
+ mov r11,QWORD[((16+352))+rsp] |
+ mov r12,QWORD[((24+352))+rsp] |
+ lea rdi,[352+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[rsp] |
+ lea rbx,[rsp] |
+ mov r9,QWORD[((0+32))+rsp] |
+ mov r10,QWORD[((8+32))+rsp] |
+ lea rsi,[((0+32))+rsp] |
+ mov r11,QWORD[((16+32))+rsp] |
+ mov r12,QWORD[((24+32))+rsp] |
+ lea rdi,[128+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[160+rsp] |
+ lea rbx,[160+rsp] |
+ mov r9,QWORD[((0+32))+rsp] |
+ mov r10,QWORD[((8+32))+rsp] |
+ lea rsi,[((0+32))+rsp] |
+ mov r11,QWORD[((16+32))+rsp] |
+ mov r12,QWORD[((24+32))+rsp] |
+ lea rdi,[192+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ |
+ |
+ |
+ add r12,r12 |
+ lea rsi,[96+rsp] |
+ adc r13,r13 |
+ mov rax,r12 |
+ adc r8,r8 |
+ adc r9,r9 |
+ mov rbp,r13 |
+ sbb r11,r11 |
+ |
+ sub r12,-1 |
+ mov rcx,r8 |
+ sbb r13,r14 |
+ sbb r8,0 |
+ mov r10,r9 |
+ sbb r9,r15 |
+ test r11,r11 |
+ |
+ cmovz r12,rax |
+ mov rax,QWORD[rsi] |
+ cmovz r13,rbp |
+ mov rbp,QWORD[8+rsi] |
+ cmovz r8,rcx |
+ mov rcx,QWORD[16+rsi] |
+ cmovz r9,r10 |
+ mov r10,QWORD[24+rsi] |
+ |
+ call __ecp_nistz256_subq |
+ |
+ lea rbx,[128+rsp] |
+ lea rdi,[288+rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+ mov rax,QWORD[((192+0))+rsp] |
+ mov rbp,QWORD[((192+8))+rsp] |
+ mov rcx,QWORD[((192+16))+rsp] |
+ mov r10,QWORD[((192+24))+rsp] |
+ lea rdi,[320+rsp] |
+ |
+ call __ecp_nistz256_subq |
+ |
+ mov QWORD[rdi],r12 |
+ mov QWORD[8+rdi],r13 |
+ mov QWORD[16+rdi],r8 |
+ mov QWORD[24+rdi],r9 |
+ mov rax,QWORD[128+rsp] |
+ lea rbx,[128+rsp] |
+ mov r9,QWORD[((0+224))+rsp] |
+ mov r10,QWORD[((8+224))+rsp] |
+ lea rsi,[((0+224))+rsp] |
+ mov r11,QWORD[((16+224))+rsp] |
+ mov r12,QWORD[((24+224))+rsp] |
+ lea rdi,[256+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[320+rsp] |
+ lea rbx,[320+rsp] |
+ mov r9,QWORD[((0+64))+rsp] |
+ mov r10,QWORD[((8+64))+rsp] |
+ lea rsi,[((0+64))+rsp] |
+ mov r11,QWORD[((16+64))+rsp] |
+ mov r12,QWORD[((24+64))+rsp] |
+ lea rdi,[320+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ lea rbx,[256+rsp] |
+ lea rdi,[320+rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+DB 102,72,15,126,199 |
+ |
+ movdqa xmm0,xmm5 |
+ movdqa xmm1,xmm5 |
+ pandn xmm0,XMMWORD[352+rsp] |
+ movdqa xmm2,xmm5 |
+ pandn xmm1,XMMWORD[((352+16))+rsp] |
+ movdqa xmm3,xmm5 |
+ pand xmm2,XMMWORD[544+rsp] |
+ pand xmm3,XMMWORD[((544+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ |
+ movdqa xmm0,xmm4 |
+ movdqa xmm1,xmm4 |
+ pandn xmm0,xmm2 |
+ movdqa xmm2,xmm4 |
+ pandn xmm1,xmm3 |
+ movdqa xmm3,xmm4 |
+ pand xmm2,XMMWORD[448+rsp] |
+ pand xmm3,XMMWORD[((448+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ movdqu XMMWORD[64+rdi],xmm2 |
+ movdqu XMMWORD[80+rdi],xmm3 |
+ |
+ movdqa xmm0,xmm5 |
+ movdqa xmm1,xmm5 |
+ pandn xmm0,XMMWORD[288+rsp] |
+ movdqa xmm2,xmm5 |
+ pandn xmm1,XMMWORD[((288+16))+rsp] |
+ movdqa xmm3,xmm5 |
+ pand xmm2,XMMWORD[480+rsp] |
+ pand xmm3,XMMWORD[((480+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ |
+ movdqa xmm0,xmm4 |
+ movdqa xmm1,xmm4 |
+ pandn xmm0,xmm2 |
+ movdqa xmm2,xmm4 |
+ pandn xmm1,xmm3 |
+ movdqa xmm3,xmm4 |
+ pand xmm2,XMMWORD[384+rsp] |
+ pand xmm3,XMMWORD[((384+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ movdqu XMMWORD[rdi],xmm2 |
+ movdqu XMMWORD[16+rdi],xmm3 |
+ |
+ movdqa xmm0,xmm5 |
+ movdqa xmm1,xmm5 |
+ pandn xmm0,XMMWORD[320+rsp] |
+ movdqa xmm2,xmm5 |
+ pandn xmm1,XMMWORD[((320+16))+rsp] |
+ movdqa xmm3,xmm5 |
+ pand xmm2,XMMWORD[512+rsp] |
+ pand xmm3,XMMWORD[((512+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ |
+ movdqa xmm0,xmm4 |
+ movdqa xmm1,xmm4 |
+ pandn xmm0,xmm2 |
+ movdqa xmm2,xmm4 |
+ pandn xmm1,xmm3 |
+ movdqa xmm3,xmm4 |
+ pand xmm2,XMMWORD[416+rsp] |
+ pand xmm3,XMMWORD[((416+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ movdqu XMMWORD[32+rdi],xmm2 |
+ movdqu XMMWORD[48+rdi],xmm3 |
+ |
+$L$add_doneq: |
+ add rsp,32*18+8 |
+ pop r15 |
+ pop r14 |
+ pop r13 |
+ pop r12 |
+ pop rbx |
+ pop rbp |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_point_add: |
+global ecp_nistz256_point_add_affine |
+ |
+ALIGN 32 |
+ecp_nistz256_point_add_affine: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_ecp_nistz256_point_add_affine: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ mov rdx,r8 |
+ |
+ |
+ push rbp |
+ push rbx |
+ push r12 |
+ push r13 |
+ push r14 |
+ push r15 |
+ sub rsp,32*15+8 |
+ |
+ movdqu xmm0,XMMWORD[rsi] |
+ mov rbx,rdx |
+ movdqu xmm1,XMMWORD[16+rsi] |
+ movdqu xmm2,XMMWORD[32+rsi] |
+ movdqu xmm3,XMMWORD[48+rsi] |
+ movdqu xmm4,XMMWORD[64+rsi] |
+ movdqu xmm5,XMMWORD[80+rsi] |
+ mov rax,QWORD[((64+0))+rsi] |
+ mov r14,QWORD[((64+8))+rsi] |
+ mov r15,QWORD[((64+16))+rsi] |
+ mov r8,QWORD[((64+24))+rsi] |
+ movdqa XMMWORD[320+rsp],xmm0 |
+ movdqa XMMWORD[(320+16)+rsp],xmm1 |
+ por xmm1,xmm0 |
+ movdqa XMMWORD[352+rsp],xmm2 |
+ movdqa XMMWORD[(352+16)+rsp],xmm3 |
+ por xmm3,xmm2 |
+ movdqa XMMWORD[384+rsp],xmm4 |
+ movdqa XMMWORD[(384+16)+rsp],xmm5 |
+ por xmm3,xmm1 |
+ |
+ movdqu xmm0,XMMWORD[rbx] |
+ pshufd xmm5,xmm3,0xb1 |
+ movdqu xmm1,XMMWORD[16+rbx] |
+ movdqu xmm2,XMMWORD[32+rbx] |
+ por xmm5,xmm3 |
+ movdqu xmm3,XMMWORD[48+rbx] |
+ movdqa XMMWORD[416+rsp],xmm0 |
+ pshufd xmm4,xmm5,0x1e |
+ movdqa XMMWORD[(416+16)+rsp],xmm1 |
+ por xmm1,xmm0 |
+DB 102,72,15,110,199 |
+ movdqa XMMWORD[448+rsp],xmm2 |
+ movdqa XMMWORD[(448+16)+rsp],xmm3 |
+ por xmm3,xmm2 |
+ por xmm5,xmm4 |
+ pxor xmm4,xmm4 |
+ por xmm3,xmm1 |
+ |
+ lea rsi,[((64-0))+rsi] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ pcmpeqd xmm5,xmm4 |
+ pshufd xmm4,xmm3,0xb1 |
+ mov rax,QWORD[rbx] |
+ |
+ mov r9,r12 |
+ por xmm4,xmm3 |
+ pshufd xmm5,xmm5,0 |
+ pshufd xmm3,xmm4,0x1e |
+ mov r10,r13 |
+ por xmm4,xmm3 |
+ pxor xmm3,xmm3 |
+ mov r11,r14 |
+ pcmpeqd xmm4,xmm3 |
+ pshufd xmm4,xmm4,0 |
+ |
+ lea rsi,[((32-0))+rsp] |
+ mov r12,r15 |
+ lea rdi,[rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ lea rbx,[320+rsp] |
+ lea rdi,[64+rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+ mov rax,QWORD[384+rsp] |
+ lea rbx,[384+rsp] |
+ mov r9,QWORD[((0+32))+rsp] |
+ mov r10,QWORD[((8+32))+rsp] |
+ lea rsi,[((0+32))+rsp] |
+ mov r11,QWORD[((16+32))+rsp] |
+ mov r12,QWORD[((24+32))+rsp] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[384+rsp] |
+ lea rbx,[384+rsp] |
+ mov r9,QWORD[((0+64))+rsp] |
+ mov r10,QWORD[((8+64))+rsp] |
+ lea rsi,[((0+64))+rsp] |
+ mov r11,QWORD[((16+64))+rsp] |
+ mov r12,QWORD[((24+64))+rsp] |
+ lea rdi,[288+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[448+rsp] |
+ lea rbx,[448+rsp] |
+ mov r9,QWORD[((0+32))+rsp] |
+ mov r10,QWORD[((8+32))+rsp] |
+ lea rsi,[((0+32))+rsp] |
+ mov r11,QWORD[((16+32))+rsp] |
+ mov r12,QWORD[((24+32))+rsp] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ lea rbx,[352+rsp] |
+ lea rdi,[96+rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+ mov rax,QWORD[((0+64))+rsp] |
+ mov r14,QWORD[((8+64))+rsp] |
+ lea rsi,[((0+64))+rsp] |
+ mov r15,QWORD[((16+64))+rsp] |
+ mov r8,QWORD[((24+64))+rsp] |
+ lea rdi,[128+rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ mov rax,QWORD[((0+96))+rsp] |
+ mov r14,QWORD[((8+96))+rsp] |
+ lea rsi,[((0+96))+rsp] |
+ mov r15,QWORD[((16+96))+rsp] |
+ mov r8,QWORD[((24+96))+rsp] |
+ lea rdi,[192+rsp] |
+ call __ecp_nistz256_sqr_montq |
+ |
+ mov rax,QWORD[128+rsp] |
+ lea rbx,[128+rsp] |
+ mov r9,QWORD[((0+64))+rsp] |
+ mov r10,QWORD[((8+64))+rsp] |
+ lea rsi,[((0+64))+rsp] |
+ mov r11,QWORD[((16+64))+rsp] |
+ mov r12,QWORD[((24+64))+rsp] |
+ lea rdi,[160+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[320+rsp] |
+ lea rbx,[320+rsp] |
+ mov r9,QWORD[((0+128))+rsp] |
+ mov r10,QWORD[((8+128))+rsp] |
+ lea rsi,[((0+128))+rsp] |
+ mov r11,QWORD[((16+128))+rsp] |
+ mov r12,QWORD[((24+128))+rsp] |
+ lea rdi,[rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ |
+ |
+ |
+ add r12,r12 |
+ lea rsi,[192+rsp] |
+ adc r13,r13 |
+ mov rax,r12 |
+ adc r8,r8 |
+ adc r9,r9 |
+ mov rbp,r13 |
+ sbb r11,r11 |
+ |
+ sub r12,-1 |
+ mov rcx,r8 |
+ sbb r13,r14 |
+ sbb r8,0 |
+ mov r10,r9 |
+ sbb r9,r15 |
+ test r11,r11 |
+ |
+ cmovz r12,rax |
+ mov rax,QWORD[rsi] |
+ cmovz r13,rbp |
+ mov rbp,QWORD[8+rsi] |
+ cmovz r8,rcx |
+ mov rcx,QWORD[16+rsi] |
+ cmovz r9,r10 |
+ mov r10,QWORD[24+rsi] |
+ |
+ call __ecp_nistz256_subq |
+ |
+ lea rbx,[160+rsp] |
+ lea rdi,[224+rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+ mov rax,QWORD[((0+0))+rsp] |
+ mov rbp,QWORD[((0+8))+rsp] |
+ mov rcx,QWORD[((0+16))+rsp] |
+ mov r10,QWORD[((0+24))+rsp] |
+ lea rdi,[64+rsp] |
+ |
+ call __ecp_nistz256_subq |
+ |
+ mov QWORD[rdi],r12 |
+ mov QWORD[8+rdi],r13 |
+ mov QWORD[16+rdi],r8 |
+ mov QWORD[24+rdi],r9 |
+ mov rax,QWORD[352+rsp] |
+ lea rbx,[352+rsp] |
+ mov r9,QWORD[((0+160))+rsp] |
+ mov r10,QWORD[((8+160))+rsp] |
+ lea rsi,[((0+160))+rsp] |
+ mov r11,QWORD[((16+160))+rsp] |
+ mov r12,QWORD[((24+160))+rsp] |
+ lea rdi,[32+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ mov rax,QWORD[96+rsp] |
+ lea rbx,[96+rsp] |
+ mov r9,QWORD[((0+64))+rsp] |
+ mov r10,QWORD[((8+64))+rsp] |
+ lea rsi,[((0+64))+rsp] |
+ mov r11,QWORD[((16+64))+rsp] |
+ mov r12,QWORD[((24+64))+rsp] |
+ lea rdi,[64+rsp] |
+ call __ecp_nistz256_mul_montq |
+ |
+ lea rbx,[32+rsp] |
+ lea rdi,[256+rsp] |
+ call __ecp_nistz256_sub_fromq |
+ |
+DB 102,72,15,126,199 |
+ |
+ movdqa xmm0,xmm5 |
+ movdqa xmm1,xmm5 |
+ pandn xmm0,XMMWORD[288+rsp] |
+ movdqa xmm2,xmm5 |
+ pandn xmm1,XMMWORD[((288+16))+rsp] |
+ movdqa xmm3,xmm5 |
+ pand xmm2,XMMWORD[$L$ONE_mont] |
+ pand xmm3,XMMWORD[(($L$ONE_mont+16))] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ |
+ movdqa xmm0,xmm4 |
+ movdqa xmm1,xmm4 |
+ pandn xmm0,xmm2 |
+ movdqa xmm2,xmm4 |
+ pandn xmm1,xmm3 |
+ movdqa xmm3,xmm4 |
+ pand xmm2,XMMWORD[384+rsp] |
+ pand xmm3,XMMWORD[((384+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ movdqu XMMWORD[64+rdi],xmm2 |
+ movdqu XMMWORD[80+rdi],xmm3 |
+ |
+ movdqa xmm0,xmm5 |
+ movdqa xmm1,xmm5 |
+ pandn xmm0,XMMWORD[224+rsp] |
+ movdqa xmm2,xmm5 |
+ pandn xmm1,XMMWORD[((224+16))+rsp] |
+ movdqa xmm3,xmm5 |
+ pand xmm2,XMMWORD[416+rsp] |
+ pand xmm3,XMMWORD[((416+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ |
+ movdqa xmm0,xmm4 |
+ movdqa xmm1,xmm4 |
+ pandn xmm0,xmm2 |
+ movdqa xmm2,xmm4 |
+ pandn xmm1,xmm3 |
+ movdqa xmm3,xmm4 |
+ pand xmm2,XMMWORD[320+rsp] |
+ pand xmm3,XMMWORD[((320+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ movdqu XMMWORD[rdi],xmm2 |
+ movdqu XMMWORD[16+rdi],xmm3 |
+ |
+ movdqa xmm0,xmm5 |
+ movdqa xmm1,xmm5 |
+ pandn xmm0,XMMWORD[256+rsp] |
+ movdqa xmm2,xmm5 |
+ pandn xmm1,XMMWORD[((256+16))+rsp] |
+ movdqa xmm3,xmm5 |
+ pand xmm2,XMMWORD[448+rsp] |
+ pand xmm3,XMMWORD[((448+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ |
+ movdqa xmm0,xmm4 |
+ movdqa xmm1,xmm4 |
+ pandn xmm0,xmm2 |
+ movdqa xmm2,xmm4 |
+ pandn xmm1,xmm3 |
+ movdqa xmm3,xmm4 |
+ pand xmm2,XMMWORD[352+rsp] |
+ pand xmm3,XMMWORD[((352+16))+rsp] |
+ por xmm2,xmm0 |
+ por xmm3,xmm1 |
+ movdqu XMMWORD[32+rdi],xmm2 |
+ movdqu XMMWORD[48+rdi],xmm3 |
+ |
+ add rsp,32*15+8 |
+ pop r15 |
+ pop r14 |
+ pop r13 |
+ pop r12 |
+ pop rbx |
+ pop rbp |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_ecp_nistz256_point_add_affine: |