Index: third_party/boringssl/win-x86/crypto/bn/co-586.asm |
diff --git a/third_party/boringssl/win-x86/crypto/bn/co-586.asm b/third_party/boringssl/win-x86/crypto/bn/co-586.asm |
new file mode 100644 |
index 0000000000000000000000000000000000000000..5780dc841bf88876f8d8ce368a9c7de80b2b4500 |
--- /dev/null |
+++ b/third_party/boringssl/win-x86/crypto/bn/co-586.asm |
@@ -0,0 +1,1260 @@ |
+%ifidn __OUTPUT_FORMAT__,obj |
+section code use32 class=code align=64 |
+%elifidn __OUTPUT_FORMAT__,win32 |
+%ifdef __YASM_VERSION_ID__ |
+%if __YASM_VERSION_ID__ < 01010000h |
+%error yasm version 1.1.0 or later needed. |
+%endif |
+; Yasm automatically includes .00 and complains about redefining it. |
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html |
+%else |
+$@feat.00 equ 1 |
+%endif |
+section .text code align=64 |
+%else |
+section .text code |
+%endif |
+global _bn_mul_comba8 |
+align 16 |
+_bn_mul_comba8: |
+L$_bn_mul_comba8_begin: |
+ push esi |
+ mov esi,DWORD [12+esp] |
+ push edi |
+ mov edi,DWORD [20+esp] |
+ push ebp |
+ push ebx |
+ xor ebx,ebx |
+ mov eax,DWORD [esi] |
+ xor ecx,ecx |
+ mov edx,DWORD [edi] |
+ ; ################## Calculate word 0 |
+ xor ebp,ebp |
+ ; mul a[0]*b[0] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ecx,edx |
+ mov edx,DWORD [edi] |
+ adc ebp,0 |
+ mov DWORD [eax],ebx |
+ mov eax,DWORD [4+esi] |
+ ; saved r[0] |
+ ; ################## Calculate word 1 |
+ xor ebx,ebx |
+ ; mul a[1]*b[0] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [esi] |
+ adc ebp,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebx,0 |
+ ; mul a[0]*b[1] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebp,edx |
+ mov edx,DWORD [edi] |
+ adc ebx,0 |
+ mov DWORD [4+eax],ecx |
+ mov eax,DWORD [8+esi] |
+ ; saved r[1] |
+ ; ################## Calculate word 2 |
+ xor ecx,ecx |
+ ; mul a[2]*b[0] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [4+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [4+edi] |
+ adc ecx,0 |
+ ; mul a[1]*b[1] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [esi] |
+ adc ebx,edx |
+ mov edx,DWORD [8+edi] |
+ adc ecx,0 |
+ ; mul a[0]*b[2] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebx,edx |
+ mov edx,DWORD [edi] |
+ adc ecx,0 |
+ mov DWORD [8+eax],ebp |
+ mov eax,DWORD [12+esi] |
+ ; saved r[2] |
+ ; ################## Calculate word 3 |
+ xor ebp,ebp |
+ ; mul a[3]*b[0] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [8+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebp,0 |
+ ; mul a[2]*b[1] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [4+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [8+edi] |
+ adc ebp,0 |
+ ; mul a[1]*b[2] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [esi] |
+ adc ecx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ebp,0 |
+ ; mul a[0]*b[3] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ecx,edx |
+ mov edx,DWORD [edi] |
+ adc ebp,0 |
+ mov DWORD [12+eax],ebx |
+ mov eax,DWORD [16+esi] |
+ ; saved r[3] |
+ ; ################## Calculate word 4 |
+ xor ebx,ebx |
+ ; mul a[4]*b[0] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [12+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebx,0 |
+ ; mul a[3]*b[1] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [8+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [8+edi] |
+ adc ebx,0 |
+ ; mul a[2]*b[2] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [4+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [12+edi] |
+ adc ebx,0 |
+ ; mul a[1]*b[3] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [esi] |
+ adc ebp,edx |
+ mov edx,DWORD [16+edi] |
+ adc ebx,0 |
+ ; mul a[0]*b[4] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebp,edx |
+ mov edx,DWORD [edi] |
+ adc ebx,0 |
+ mov DWORD [16+eax],ecx |
+ mov eax,DWORD [20+esi] |
+ ; saved r[4] |
+ ; ################## Calculate word 5 |
+ xor ecx,ecx |
+ ; mul a[5]*b[0] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [16+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [4+edi] |
+ adc ecx,0 |
+ ; mul a[4]*b[1] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [12+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [8+edi] |
+ adc ecx,0 |
+ ; mul a[3]*b[2] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [8+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ecx,0 |
+ ; mul a[2]*b[3] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [4+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [16+edi] |
+ adc ecx,0 |
+ ; mul a[1]*b[4] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [esi] |
+ adc ebx,edx |
+ mov edx,DWORD [20+edi] |
+ adc ecx,0 |
+ ; mul a[0]*b[5] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebx,edx |
+ mov edx,DWORD [edi] |
+ adc ecx,0 |
+ mov DWORD [20+eax],ebp |
+ mov eax,DWORD [24+esi] |
+ ; saved r[5] |
+ ; ################## Calculate word 6 |
+ xor ebp,ebp |
+ ; mul a[6]*b[0] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebp,0 |
+ ; mul a[5]*b[1] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [16+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [8+edi] |
+ adc ebp,0 |
+ ; mul a[4]*b[2] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [12+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ebp,0 |
+ ; mul a[3]*b[3] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [8+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [16+edi] |
+ adc ebp,0 |
+ ; mul a[2]*b[4] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [4+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [20+edi] |
+ adc ebp,0 |
+ ; mul a[1]*b[5] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [esi] |
+ adc ecx,edx |
+ mov edx,DWORD [24+edi] |
+ adc ebp,0 |
+ ; mul a[0]*b[6] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ecx,edx |
+ mov edx,DWORD [edi] |
+ adc ebp,0 |
+ mov DWORD [24+eax],ebx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[6] |
+ ; ################## Calculate word 7 |
+ xor ebx,ebx |
+ ; mul a[7]*b[0] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [24+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebx,0 |
+ ; mul a[6]*b[1] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [8+edi] |
+ adc ebx,0 |
+ ; mul a[5]*b[2] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [16+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [12+edi] |
+ adc ebx,0 |
+ ; mul a[4]*b[3] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [12+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [16+edi] |
+ adc ebx,0 |
+ ; mul a[3]*b[4] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [8+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [20+edi] |
+ adc ebx,0 |
+ ; mul a[2]*b[5] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [4+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [24+edi] |
+ adc ebx,0 |
+ ; mul a[1]*b[6] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [esi] |
+ adc ebp,edx |
+ mov edx,DWORD [28+edi] |
+ adc ebx,0 |
+ ; mul a[0]*b[7] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebp,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebx,0 |
+ mov DWORD [28+eax],ecx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[7] |
+ ; ################## Calculate word 8 |
+ xor ecx,ecx |
+ ; mul a[7]*b[1] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [24+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [8+edi] |
+ adc ecx,0 |
+ ; mul a[6]*b[2] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ecx,0 |
+ ; mul a[5]*b[3] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [16+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [16+edi] |
+ adc ecx,0 |
+ ; mul a[4]*b[4] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [12+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [20+edi] |
+ adc ecx,0 |
+ ; mul a[3]*b[5] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [8+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [24+edi] |
+ adc ecx,0 |
+ ; mul a[2]*b[6] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [4+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [28+edi] |
+ adc ecx,0 |
+ ; mul a[1]*b[7] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebx,edx |
+ mov edx,DWORD [8+edi] |
+ adc ecx,0 |
+ mov DWORD [32+eax],ebp |
+ mov eax,DWORD [28+esi] |
+ ; saved r[8] |
+ ; ################## Calculate word 9 |
+ xor ebp,ebp |
+ ; mul a[7]*b[2] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [24+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ebp,0 |
+ ; mul a[6]*b[3] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [16+edi] |
+ adc ebp,0 |
+ ; mul a[5]*b[4] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [16+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [20+edi] |
+ adc ebp,0 |
+ ; mul a[4]*b[5] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [12+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [24+edi] |
+ adc ebp,0 |
+ ; mul a[3]*b[6] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [8+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [28+edi] |
+ adc ebp,0 |
+ ; mul a[2]*b[7] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ecx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ebp,0 |
+ mov DWORD [36+eax],ebx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[9] |
+ ; ################## Calculate word 10 |
+ xor ebx,ebx |
+ ; mul a[7]*b[3] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [24+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [16+edi] |
+ adc ebx,0 |
+ ; mul a[6]*b[4] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [20+edi] |
+ adc ebx,0 |
+ ; mul a[5]*b[5] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [16+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [24+edi] |
+ adc ebx,0 |
+ ; mul a[4]*b[6] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [12+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [28+edi] |
+ adc ebx,0 |
+ ; mul a[3]*b[7] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebp,edx |
+ mov edx,DWORD [16+edi] |
+ adc ebx,0 |
+ mov DWORD [40+eax],ecx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[10] |
+ ; ################## Calculate word 11 |
+ xor ecx,ecx |
+ ; mul a[7]*b[4] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [24+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [20+edi] |
+ adc ecx,0 |
+ ; mul a[6]*b[5] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [24+edi] |
+ adc ecx,0 |
+ ; mul a[5]*b[6] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [16+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [28+edi] |
+ adc ecx,0 |
+ ; mul a[4]*b[7] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebx,edx |
+ mov edx,DWORD [20+edi] |
+ adc ecx,0 |
+ mov DWORD [44+eax],ebp |
+ mov eax,DWORD [28+esi] |
+ ; saved r[11] |
+ ; ################## Calculate word 12 |
+ xor ebp,ebp |
+ ; mul a[7]*b[5] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [24+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [24+edi] |
+ adc ebp,0 |
+ ; mul a[6]*b[6] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [28+edi] |
+ adc ebp,0 |
+ ; mul a[5]*b[7] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ecx,edx |
+ mov edx,DWORD [24+edi] |
+ adc ebp,0 |
+ mov DWORD [48+eax],ebx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[12] |
+ ; ################## Calculate word 13 |
+ xor ebx,ebx |
+ ; mul a[7]*b[6] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [24+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [28+edi] |
+ adc ebx,0 |
+ ; mul a[6]*b[7] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebp,edx |
+ mov edx,DWORD [28+edi] |
+ adc ebx,0 |
+ mov DWORD [52+eax],ecx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[13] |
+ ; ################## Calculate word 14 |
+ xor ecx,ecx |
+ ; mul a[7]*b[7] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebx,edx |
+ adc ecx,0 |
+ mov DWORD [56+eax],ebp |
+ ; saved r[14] |
+ ; save r[15] |
+ mov DWORD [60+eax],ebx |
+ pop ebx |
+ pop ebp |
+ pop edi |
+ pop esi |
+ ret |
+global _bn_mul_comba4 |
+align 16 |
+_bn_mul_comba4: |
+L$_bn_mul_comba4_begin: |
+ push esi |
+ mov esi,DWORD [12+esp] |
+ push edi |
+ mov edi,DWORD [20+esp] |
+ push ebp |
+ push ebx |
+ xor ebx,ebx |
+ mov eax,DWORD [esi] |
+ xor ecx,ecx |
+ mov edx,DWORD [edi] |
+ ; ################## Calculate word 0 |
+ xor ebp,ebp |
+ ; mul a[0]*b[0] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ecx,edx |
+ mov edx,DWORD [edi] |
+ adc ebp,0 |
+ mov DWORD [eax],ebx |
+ mov eax,DWORD [4+esi] |
+ ; saved r[0] |
+ ; ################## Calculate word 1 |
+ xor ebx,ebx |
+ ; mul a[1]*b[0] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [esi] |
+ adc ebp,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebx,0 |
+ ; mul a[0]*b[1] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebp,edx |
+ mov edx,DWORD [edi] |
+ adc ebx,0 |
+ mov DWORD [4+eax],ecx |
+ mov eax,DWORD [8+esi] |
+ ; saved r[1] |
+ ; ################## Calculate word 2 |
+ xor ecx,ecx |
+ ; mul a[2]*b[0] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [4+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [4+edi] |
+ adc ecx,0 |
+ ; mul a[1]*b[1] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [esi] |
+ adc ebx,edx |
+ mov edx,DWORD [8+edi] |
+ adc ecx,0 |
+ ; mul a[0]*b[2] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebx,edx |
+ mov edx,DWORD [edi] |
+ adc ecx,0 |
+ mov DWORD [8+eax],ebp |
+ mov eax,DWORD [12+esi] |
+ ; saved r[2] |
+ ; ################## Calculate word 3 |
+ xor ebp,ebp |
+ ; mul a[3]*b[0] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [8+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebp,0 |
+ ; mul a[2]*b[1] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [4+esi] |
+ adc ecx,edx |
+ mov edx,DWORD [8+edi] |
+ adc ebp,0 |
+ ; mul a[1]*b[2] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [esi] |
+ adc ecx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ebp,0 |
+ ; mul a[0]*b[3] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ecx,edx |
+ mov edx,DWORD [4+edi] |
+ adc ebp,0 |
+ mov DWORD [12+eax],ebx |
+ mov eax,DWORD [12+esi] |
+ ; saved r[3] |
+ ; ################## Calculate word 4 |
+ xor ebx,ebx |
+ ; mul a[3]*b[1] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [8+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [8+edi] |
+ adc ebx,0 |
+ ; mul a[2]*b[2] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [4+esi] |
+ adc ebp,edx |
+ mov edx,DWORD [12+edi] |
+ adc ebx,0 |
+ ; mul a[1]*b[3] |
+ mul edx |
+ add ecx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebp,edx |
+ mov edx,DWORD [8+edi] |
+ adc ebx,0 |
+ mov DWORD [16+eax],ecx |
+ mov eax,DWORD [12+esi] |
+ ; saved r[4] |
+ ; ################## Calculate word 5 |
+ xor ecx,ecx |
+ ; mul a[3]*b[2] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [8+esi] |
+ adc ebx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ecx,0 |
+ ; mul a[2]*b[3] |
+ mul edx |
+ add ebp,eax |
+ mov eax,DWORD [20+esp] |
+ adc ebx,edx |
+ mov edx,DWORD [12+edi] |
+ adc ecx,0 |
+ mov DWORD [20+eax],ebp |
+ mov eax,DWORD [12+esi] |
+ ; saved r[5] |
+ ; ################## Calculate word 6 |
+ xor ebp,ebp |
+ ; mul a[3]*b[3] |
+ mul edx |
+ add ebx,eax |
+ mov eax,DWORD [20+esp] |
+ adc ecx,edx |
+ adc ebp,0 |
+ mov DWORD [24+eax],ebx |
+ ; saved r[6] |
+ ; save r[7] |
+ mov DWORD [28+eax],ecx |
+ pop ebx |
+ pop ebp |
+ pop edi |
+ pop esi |
+ ret |
+global _bn_sqr_comba8 |
+align 16 |
+_bn_sqr_comba8: |
+L$_bn_sqr_comba8_begin: |
+ push esi |
+ push edi |
+ push ebp |
+ push ebx |
+ mov edi,DWORD [20+esp] |
+ mov esi,DWORD [24+esp] |
+ xor ebx,ebx |
+ xor ecx,ecx |
+ mov eax,DWORD [esi] |
+ ; ############### Calculate word 0 |
+ xor ebp,ebp |
+ ; sqr a[0]*a[0] |
+ mul eax |
+ add ebx,eax |
+ adc ecx,edx |
+ mov edx,DWORD [esi] |
+ adc ebp,0 |
+ mov DWORD [edi],ebx |
+ mov eax,DWORD [4+esi] |
+ ; saved r[0] |
+ ; ############### Calculate word 1 |
+ xor ebx,ebx |
+ ; sqr a[1]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [8+esi] |
+ adc ebx,0 |
+ mov DWORD [4+edi],ecx |
+ mov edx,DWORD [esi] |
+ ; saved r[1] |
+ ; ############### Calculate word 2 |
+ xor ecx,ecx |
+ ; sqr a[2]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [4+esi] |
+ adc ecx,0 |
+ ; sqr a[1]*a[1] |
+ mul eax |
+ add ebp,eax |
+ adc ebx,edx |
+ mov edx,DWORD [esi] |
+ adc ecx,0 |
+ mov DWORD [8+edi],ebp |
+ mov eax,DWORD [12+esi] |
+ ; saved r[2] |
+ ; ############### Calculate word 3 |
+ xor ebp,ebp |
+ ; sqr a[3]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [8+esi] |
+ adc ebp,0 |
+ mov edx,DWORD [4+esi] |
+ ; sqr a[2]*a[1] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [16+esi] |
+ adc ebp,0 |
+ mov DWORD [12+edi],ebx |
+ mov edx,DWORD [esi] |
+ ; saved r[3] |
+ ; ############### Calculate word 4 |
+ xor ebx,ebx |
+ ; sqr a[4]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [12+esi] |
+ adc ebx,0 |
+ mov edx,DWORD [4+esi] |
+ ; sqr a[3]*a[1] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [8+esi] |
+ adc ebx,0 |
+ ; sqr a[2]*a[2] |
+ mul eax |
+ add ecx,eax |
+ adc ebp,edx |
+ mov edx,DWORD [esi] |
+ adc ebx,0 |
+ mov DWORD [16+edi],ecx |
+ mov eax,DWORD [20+esi] |
+ ; saved r[4] |
+ ; ############### Calculate word 5 |
+ xor ecx,ecx |
+ ; sqr a[5]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [16+esi] |
+ adc ecx,0 |
+ mov edx,DWORD [4+esi] |
+ ; sqr a[4]*a[1] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [12+esi] |
+ adc ecx,0 |
+ mov edx,DWORD [8+esi] |
+ ; sqr a[3]*a[2] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [24+esi] |
+ adc ecx,0 |
+ mov DWORD [20+edi],ebp |
+ mov edx,DWORD [esi] |
+ ; saved r[5] |
+ ; ############### Calculate word 6 |
+ xor ebp,ebp |
+ ; sqr a[6]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [20+esi] |
+ adc ebp,0 |
+ mov edx,DWORD [4+esi] |
+ ; sqr a[5]*a[1] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [16+esi] |
+ adc ebp,0 |
+ mov edx,DWORD [8+esi] |
+ ; sqr a[4]*a[2] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [12+esi] |
+ adc ebp,0 |
+ ; sqr a[3]*a[3] |
+ mul eax |
+ add ebx,eax |
+ adc ecx,edx |
+ mov edx,DWORD [esi] |
+ adc ebp,0 |
+ mov DWORD [24+edi],ebx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[6] |
+ ; ############### Calculate word 7 |
+ xor ebx,ebx |
+ ; sqr a[7]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [24+esi] |
+ adc ebx,0 |
+ mov edx,DWORD [4+esi] |
+ ; sqr a[6]*a[1] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [20+esi] |
+ adc ebx,0 |
+ mov edx,DWORD [8+esi] |
+ ; sqr a[5]*a[2] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [16+esi] |
+ adc ebx,0 |
+ mov edx,DWORD [12+esi] |
+ ; sqr a[4]*a[3] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [28+esi] |
+ adc ebx,0 |
+ mov DWORD [28+edi],ecx |
+ mov edx,DWORD [4+esi] |
+ ; saved r[7] |
+ ; ############### Calculate word 8 |
+ xor ecx,ecx |
+ ; sqr a[7]*a[1] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [24+esi] |
+ adc ecx,0 |
+ mov edx,DWORD [8+esi] |
+ ; sqr a[6]*a[2] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [20+esi] |
+ adc ecx,0 |
+ mov edx,DWORD [12+esi] |
+ ; sqr a[5]*a[3] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [16+esi] |
+ adc ecx,0 |
+ ; sqr a[4]*a[4] |
+ mul eax |
+ add ebp,eax |
+ adc ebx,edx |
+ mov edx,DWORD [8+esi] |
+ adc ecx,0 |
+ mov DWORD [32+edi],ebp |
+ mov eax,DWORD [28+esi] |
+ ; saved r[8] |
+ ; ############### Calculate word 9 |
+ xor ebp,ebp |
+ ; sqr a[7]*a[2] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [24+esi] |
+ adc ebp,0 |
+ mov edx,DWORD [12+esi] |
+ ; sqr a[6]*a[3] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [20+esi] |
+ adc ebp,0 |
+ mov edx,DWORD [16+esi] |
+ ; sqr a[5]*a[4] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [28+esi] |
+ adc ebp,0 |
+ mov DWORD [36+edi],ebx |
+ mov edx,DWORD [12+esi] |
+ ; saved r[9] |
+ ; ############### Calculate word 10 |
+ xor ebx,ebx |
+ ; sqr a[7]*a[3] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [24+esi] |
+ adc ebx,0 |
+ mov edx,DWORD [16+esi] |
+ ; sqr a[6]*a[4] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [20+esi] |
+ adc ebx,0 |
+ ; sqr a[5]*a[5] |
+ mul eax |
+ add ecx,eax |
+ adc ebp,edx |
+ mov edx,DWORD [16+esi] |
+ adc ebx,0 |
+ mov DWORD [40+edi],ecx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[10] |
+ ; ############### Calculate word 11 |
+ xor ecx,ecx |
+ ; sqr a[7]*a[4] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [24+esi] |
+ adc ecx,0 |
+ mov edx,DWORD [20+esi] |
+ ; sqr a[6]*a[5] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [28+esi] |
+ adc ecx,0 |
+ mov DWORD [44+edi],ebp |
+ mov edx,DWORD [20+esi] |
+ ; saved r[11] |
+ ; ############### Calculate word 12 |
+ xor ebp,ebp |
+ ; sqr a[7]*a[5] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [24+esi] |
+ adc ebp,0 |
+ ; sqr a[6]*a[6] |
+ mul eax |
+ add ebx,eax |
+ adc ecx,edx |
+ mov edx,DWORD [24+esi] |
+ adc ebp,0 |
+ mov DWORD [48+edi],ebx |
+ mov eax,DWORD [28+esi] |
+ ; saved r[12] |
+ ; ############### Calculate word 13 |
+ xor ebx,ebx |
+ ; sqr a[7]*a[6] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [28+esi] |
+ adc ebx,0 |
+ mov DWORD [52+edi],ecx |
+ ; saved r[13] |
+ ; ############### Calculate word 14 |
+ xor ecx,ecx |
+ ; sqr a[7]*a[7] |
+ mul eax |
+ add ebp,eax |
+ adc ebx,edx |
+ adc ecx,0 |
+ mov DWORD [56+edi],ebp |
+ ; saved r[14] |
+ mov DWORD [60+edi],ebx |
+ pop ebx |
+ pop ebp |
+ pop edi |
+ pop esi |
+ ret |
+global _bn_sqr_comba4 |
+align 16 |
+_bn_sqr_comba4: |
+L$_bn_sqr_comba4_begin: |
+ push esi |
+ push edi |
+ push ebp |
+ push ebx |
+ mov edi,DWORD [20+esp] |
+ mov esi,DWORD [24+esp] |
+ xor ebx,ebx |
+ xor ecx,ecx |
+ mov eax,DWORD [esi] |
+ ; ############### Calculate word 0 |
+ xor ebp,ebp |
+ ; sqr a[0]*a[0] |
+ mul eax |
+ add ebx,eax |
+ adc ecx,edx |
+ mov edx,DWORD [esi] |
+ adc ebp,0 |
+ mov DWORD [edi],ebx |
+ mov eax,DWORD [4+esi] |
+ ; saved r[0] |
+ ; ############### Calculate word 1 |
+ xor ebx,ebx |
+ ; sqr a[1]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [8+esi] |
+ adc ebx,0 |
+ mov DWORD [4+edi],ecx |
+ mov edx,DWORD [esi] |
+ ; saved r[1] |
+ ; ############### Calculate word 2 |
+ xor ecx,ecx |
+ ; sqr a[2]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [4+esi] |
+ adc ecx,0 |
+ ; sqr a[1]*a[1] |
+ mul eax |
+ add ebp,eax |
+ adc ebx,edx |
+ mov edx,DWORD [esi] |
+ adc ecx,0 |
+ mov DWORD [8+edi],ebp |
+ mov eax,DWORD [12+esi] |
+ ; saved r[2] |
+ ; ############### Calculate word 3 |
+ xor ebp,ebp |
+ ; sqr a[3]*a[0] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [8+esi] |
+ adc ebp,0 |
+ mov edx,DWORD [4+esi] |
+ ; sqr a[2]*a[1] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebp,0 |
+ add ebx,eax |
+ adc ecx,edx |
+ mov eax,DWORD [12+esi] |
+ adc ebp,0 |
+ mov DWORD [12+edi],ebx |
+ mov edx,DWORD [4+esi] |
+ ; saved r[3] |
+ ; ############### Calculate word 4 |
+ xor ebx,ebx |
+ ; sqr a[3]*a[1] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ebx,0 |
+ add ecx,eax |
+ adc ebp,edx |
+ mov eax,DWORD [8+esi] |
+ adc ebx,0 |
+ ; sqr a[2]*a[2] |
+ mul eax |
+ add ecx,eax |
+ adc ebp,edx |
+ mov edx,DWORD [8+esi] |
+ adc ebx,0 |
+ mov DWORD [16+edi],ecx |
+ mov eax,DWORD [12+esi] |
+ ; saved r[4] |
+ ; ############### Calculate word 5 |
+ xor ecx,ecx |
+ ; sqr a[3]*a[2] |
+ mul edx |
+ add eax,eax |
+ adc edx,edx |
+ adc ecx,0 |
+ add ebp,eax |
+ adc ebx,edx |
+ mov eax,DWORD [12+esi] |
+ adc ecx,0 |
+ mov DWORD [20+edi],ebp |
+ ; saved r[5] |
+ ; ############### Calculate word 6 |
+ xor ebp,ebp |
+ ; sqr a[3]*a[3] |
+ mul eax |
+ add ebx,eax |
+ adc ecx,edx |
+ adc ebp,0 |
+ mov DWORD [24+edi],ebx |
+ ; saved r[6] |
+ mov DWORD [28+edi],ecx |
+ pop ebx |
+ pop ebp |
+ pop edi |
+ pop esi |
+ ret |