OLD | NEW |
| (Empty) |
1 %ifidn __OUTPUT_FORMAT__,obj | |
2 section code use32 class=code align=64 | |
3 %elifidn __OUTPUT_FORMAT__,win32 | |
4 %ifdef __YASM_VERSION_ID__ | |
5 %if __YASM_VERSION_ID__ < 01010000h | |
6 %error yasm version 1.1.0 or later needed. | |
7 %endif | |
8 ; Yasm automatically includes .00 and complains about redefining it. | |
9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html | |
10 %else | |
11 $@feat.00 equ 1 | |
12 %endif | |
13 section .text code align=64 | |
14 %else | |
15 section .text code | |
16 %endif | |
17 global _bn_mul_comba8 | |
18 align 16 | |
19 _bn_mul_comba8: | |
20 L$_bn_mul_comba8_begin: | |
21 push esi | |
22 mov esi,DWORD [12+esp] | |
23 push edi | |
24 mov edi,DWORD [20+esp] | |
25 push ebp | |
26 push ebx | |
27 xor ebx,ebx | |
28 mov eax,DWORD [esi] | |
29 xor ecx,ecx | |
30 mov edx,DWORD [edi] | |
31 ; ################## Calculate word 0 | |
32 xor ebp,ebp | |
33 ; mul a[0]*b[0] | |
34 mul edx | |
35 add ebx,eax | |
36 mov eax,DWORD [20+esp] | |
37 adc ecx,edx | |
38 mov edx,DWORD [edi] | |
39 adc ebp,0 | |
40 mov DWORD [eax],ebx | |
41 mov eax,DWORD [4+esi] | |
42 ; saved r[0] | |
43 ; ################## Calculate word 1 | |
44 xor ebx,ebx | |
45 ; mul a[1]*b[0] | |
46 mul edx | |
47 add ecx,eax | |
48 mov eax,DWORD [esi] | |
49 adc ebp,edx | |
50 mov edx,DWORD [4+edi] | |
51 adc ebx,0 | |
52 ; mul a[0]*b[1] | |
53 mul edx | |
54 add ecx,eax | |
55 mov eax,DWORD [20+esp] | |
56 adc ebp,edx | |
57 mov edx,DWORD [edi] | |
58 adc ebx,0 | |
59 mov DWORD [4+eax],ecx | |
60 mov eax,DWORD [8+esi] | |
61 ; saved r[1] | |
62 ; ################## Calculate word 2 | |
63 xor ecx,ecx | |
64 ; mul a[2]*b[0] | |
65 mul edx | |
66 add ebp,eax | |
67 mov eax,DWORD [4+esi] | |
68 adc ebx,edx | |
69 mov edx,DWORD [4+edi] | |
70 adc ecx,0 | |
71 ; mul a[1]*b[1] | |
72 mul edx | |
73 add ebp,eax | |
74 mov eax,DWORD [esi] | |
75 adc ebx,edx | |
76 mov edx,DWORD [8+edi] | |
77 adc ecx,0 | |
78 ; mul a[0]*b[2] | |
79 mul edx | |
80 add ebp,eax | |
81 mov eax,DWORD [20+esp] | |
82 adc ebx,edx | |
83 mov edx,DWORD [edi] | |
84 adc ecx,0 | |
85 mov DWORD [8+eax],ebp | |
86 mov eax,DWORD [12+esi] | |
87 ; saved r[2] | |
88 ; ################## Calculate word 3 | |
89 xor ebp,ebp | |
90 ; mul a[3]*b[0] | |
91 mul edx | |
92 add ebx,eax | |
93 mov eax,DWORD [8+esi] | |
94 adc ecx,edx | |
95 mov edx,DWORD [4+edi] | |
96 adc ebp,0 | |
97 ; mul a[2]*b[1] | |
98 mul edx | |
99 add ebx,eax | |
100 mov eax,DWORD [4+esi] | |
101 adc ecx,edx | |
102 mov edx,DWORD [8+edi] | |
103 adc ebp,0 | |
104 ; mul a[1]*b[2] | |
105 mul edx | |
106 add ebx,eax | |
107 mov eax,DWORD [esi] | |
108 adc ecx,edx | |
109 mov edx,DWORD [12+edi] | |
110 adc ebp,0 | |
111 ; mul a[0]*b[3] | |
112 mul edx | |
113 add ebx,eax | |
114 mov eax,DWORD [20+esp] | |
115 adc ecx,edx | |
116 mov edx,DWORD [edi] | |
117 adc ebp,0 | |
118 mov DWORD [12+eax],ebx | |
119 mov eax,DWORD [16+esi] | |
120 ; saved r[3] | |
121 ; ################## Calculate word 4 | |
122 xor ebx,ebx | |
123 ; mul a[4]*b[0] | |
124 mul edx | |
125 add ecx,eax | |
126 mov eax,DWORD [12+esi] | |
127 adc ebp,edx | |
128 mov edx,DWORD [4+edi] | |
129 adc ebx,0 | |
130 ; mul a[3]*b[1] | |
131 mul edx | |
132 add ecx,eax | |
133 mov eax,DWORD [8+esi] | |
134 adc ebp,edx | |
135 mov edx,DWORD [8+edi] | |
136 adc ebx,0 | |
137 ; mul a[2]*b[2] | |
138 mul edx | |
139 add ecx,eax | |
140 mov eax,DWORD [4+esi] | |
141 adc ebp,edx | |
142 mov edx,DWORD [12+edi] | |
143 adc ebx,0 | |
144 ; mul a[1]*b[3] | |
145 mul edx | |
146 add ecx,eax | |
147 mov eax,DWORD [esi] | |
148 adc ebp,edx | |
149 mov edx,DWORD [16+edi] | |
150 adc ebx,0 | |
151 ; mul a[0]*b[4] | |
152 mul edx | |
153 add ecx,eax | |
154 mov eax,DWORD [20+esp] | |
155 adc ebp,edx | |
156 mov edx,DWORD [edi] | |
157 adc ebx,0 | |
158 mov DWORD [16+eax],ecx | |
159 mov eax,DWORD [20+esi] | |
160 ; saved r[4] | |
161 ; ################## Calculate word 5 | |
162 xor ecx,ecx | |
163 ; mul a[5]*b[0] | |
164 mul edx | |
165 add ebp,eax | |
166 mov eax,DWORD [16+esi] | |
167 adc ebx,edx | |
168 mov edx,DWORD [4+edi] | |
169 adc ecx,0 | |
170 ; mul a[4]*b[1] | |
171 mul edx | |
172 add ebp,eax | |
173 mov eax,DWORD [12+esi] | |
174 adc ebx,edx | |
175 mov edx,DWORD [8+edi] | |
176 adc ecx,0 | |
177 ; mul a[3]*b[2] | |
178 mul edx | |
179 add ebp,eax | |
180 mov eax,DWORD [8+esi] | |
181 adc ebx,edx | |
182 mov edx,DWORD [12+edi] | |
183 adc ecx,0 | |
184 ; mul a[2]*b[3] | |
185 mul edx | |
186 add ebp,eax | |
187 mov eax,DWORD [4+esi] | |
188 adc ebx,edx | |
189 mov edx,DWORD [16+edi] | |
190 adc ecx,0 | |
191 ; mul a[1]*b[4] | |
192 mul edx | |
193 add ebp,eax | |
194 mov eax,DWORD [esi] | |
195 adc ebx,edx | |
196 mov edx,DWORD [20+edi] | |
197 adc ecx,0 | |
198 ; mul a[0]*b[5] | |
199 mul edx | |
200 add ebp,eax | |
201 mov eax,DWORD [20+esp] | |
202 adc ebx,edx | |
203 mov edx,DWORD [edi] | |
204 adc ecx,0 | |
205 mov DWORD [20+eax],ebp | |
206 mov eax,DWORD [24+esi] | |
207 ; saved r[5] | |
208 ; ################## Calculate word 6 | |
209 xor ebp,ebp | |
210 ; mul a[6]*b[0] | |
211 mul edx | |
212 add ebx,eax | |
213 mov eax,DWORD [20+esi] | |
214 adc ecx,edx | |
215 mov edx,DWORD [4+edi] | |
216 adc ebp,0 | |
217 ; mul a[5]*b[1] | |
218 mul edx | |
219 add ebx,eax | |
220 mov eax,DWORD [16+esi] | |
221 adc ecx,edx | |
222 mov edx,DWORD [8+edi] | |
223 adc ebp,0 | |
224 ; mul a[4]*b[2] | |
225 mul edx | |
226 add ebx,eax | |
227 mov eax,DWORD [12+esi] | |
228 adc ecx,edx | |
229 mov edx,DWORD [12+edi] | |
230 adc ebp,0 | |
231 ; mul a[3]*b[3] | |
232 mul edx | |
233 add ebx,eax | |
234 mov eax,DWORD [8+esi] | |
235 adc ecx,edx | |
236 mov edx,DWORD [16+edi] | |
237 adc ebp,0 | |
238 ; mul a[2]*b[4] | |
239 mul edx | |
240 add ebx,eax | |
241 mov eax,DWORD [4+esi] | |
242 adc ecx,edx | |
243 mov edx,DWORD [20+edi] | |
244 adc ebp,0 | |
245 ; mul a[1]*b[5] | |
246 mul edx | |
247 add ebx,eax | |
248 mov eax,DWORD [esi] | |
249 adc ecx,edx | |
250 mov edx,DWORD [24+edi] | |
251 adc ebp,0 | |
252 ; mul a[0]*b[6] | |
253 mul edx | |
254 add ebx,eax | |
255 mov eax,DWORD [20+esp] | |
256 adc ecx,edx | |
257 mov edx,DWORD [edi] | |
258 adc ebp,0 | |
259 mov DWORD [24+eax],ebx | |
260 mov eax,DWORD [28+esi] | |
261 ; saved r[6] | |
262 ; ################## Calculate word 7 | |
263 xor ebx,ebx | |
264 ; mul a[7]*b[0] | |
265 mul edx | |
266 add ecx,eax | |
267 mov eax,DWORD [24+esi] | |
268 adc ebp,edx | |
269 mov edx,DWORD [4+edi] | |
270 adc ebx,0 | |
271 ; mul a[6]*b[1] | |
272 mul edx | |
273 add ecx,eax | |
274 mov eax,DWORD [20+esi] | |
275 adc ebp,edx | |
276 mov edx,DWORD [8+edi] | |
277 adc ebx,0 | |
278 ; mul a[5]*b[2] | |
279 mul edx | |
280 add ecx,eax | |
281 mov eax,DWORD [16+esi] | |
282 adc ebp,edx | |
283 mov edx,DWORD [12+edi] | |
284 adc ebx,0 | |
285 ; mul a[4]*b[3] | |
286 mul edx | |
287 add ecx,eax | |
288 mov eax,DWORD [12+esi] | |
289 adc ebp,edx | |
290 mov edx,DWORD [16+edi] | |
291 adc ebx,0 | |
292 ; mul a[3]*b[4] | |
293 mul edx | |
294 add ecx,eax | |
295 mov eax,DWORD [8+esi] | |
296 adc ebp,edx | |
297 mov edx,DWORD [20+edi] | |
298 adc ebx,0 | |
299 ; mul a[2]*b[5] | |
300 mul edx | |
301 add ecx,eax | |
302 mov eax,DWORD [4+esi] | |
303 adc ebp,edx | |
304 mov edx,DWORD [24+edi] | |
305 adc ebx,0 | |
306 ; mul a[1]*b[6] | |
307 mul edx | |
308 add ecx,eax | |
309 mov eax,DWORD [esi] | |
310 adc ebp,edx | |
311 mov edx,DWORD [28+edi] | |
312 adc ebx,0 | |
313 ; mul a[0]*b[7] | |
314 mul edx | |
315 add ecx,eax | |
316 mov eax,DWORD [20+esp] | |
317 adc ebp,edx | |
318 mov edx,DWORD [4+edi] | |
319 adc ebx,0 | |
320 mov DWORD [28+eax],ecx | |
321 mov eax,DWORD [28+esi] | |
322 ; saved r[7] | |
323 ; ################## Calculate word 8 | |
324 xor ecx,ecx | |
325 ; mul a[7]*b[1] | |
326 mul edx | |
327 add ebp,eax | |
328 mov eax,DWORD [24+esi] | |
329 adc ebx,edx | |
330 mov edx,DWORD [8+edi] | |
331 adc ecx,0 | |
332 ; mul a[6]*b[2] | |
333 mul edx | |
334 add ebp,eax | |
335 mov eax,DWORD [20+esi] | |
336 adc ebx,edx | |
337 mov edx,DWORD [12+edi] | |
338 adc ecx,0 | |
339 ; mul a[5]*b[3] | |
340 mul edx | |
341 add ebp,eax | |
342 mov eax,DWORD [16+esi] | |
343 adc ebx,edx | |
344 mov edx,DWORD [16+edi] | |
345 adc ecx,0 | |
346 ; mul a[4]*b[4] | |
347 mul edx | |
348 add ebp,eax | |
349 mov eax,DWORD [12+esi] | |
350 adc ebx,edx | |
351 mov edx,DWORD [20+edi] | |
352 adc ecx,0 | |
353 ; mul a[3]*b[5] | |
354 mul edx | |
355 add ebp,eax | |
356 mov eax,DWORD [8+esi] | |
357 adc ebx,edx | |
358 mov edx,DWORD [24+edi] | |
359 adc ecx,0 | |
360 ; mul a[2]*b[6] | |
361 mul edx | |
362 add ebp,eax | |
363 mov eax,DWORD [4+esi] | |
364 adc ebx,edx | |
365 mov edx,DWORD [28+edi] | |
366 adc ecx,0 | |
367 ; mul a[1]*b[7] | |
368 mul edx | |
369 add ebp,eax | |
370 mov eax,DWORD [20+esp] | |
371 adc ebx,edx | |
372 mov edx,DWORD [8+edi] | |
373 adc ecx,0 | |
374 mov DWORD [32+eax],ebp | |
375 mov eax,DWORD [28+esi] | |
376 ; saved r[8] | |
377 ; ################## Calculate word 9 | |
378 xor ebp,ebp | |
379 ; mul a[7]*b[2] | |
380 mul edx | |
381 add ebx,eax | |
382 mov eax,DWORD [24+esi] | |
383 adc ecx,edx | |
384 mov edx,DWORD [12+edi] | |
385 adc ebp,0 | |
386 ; mul a[6]*b[3] | |
387 mul edx | |
388 add ebx,eax | |
389 mov eax,DWORD [20+esi] | |
390 adc ecx,edx | |
391 mov edx,DWORD [16+edi] | |
392 adc ebp,0 | |
393 ; mul a[5]*b[4] | |
394 mul edx | |
395 add ebx,eax | |
396 mov eax,DWORD [16+esi] | |
397 adc ecx,edx | |
398 mov edx,DWORD [20+edi] | |
399 adc ebp,0 | |
400 ; mul a[4]*b[5] | |
401 mul edx | |
402 add ebx,eax | |
403 mov eax,DWORD [12+esi] | |
404 adc ecx,edx | |
405 mov edx,DWORD [24+edi] | |
406 adc ebp,0 | |
407 ; mul a[3]*b[6] | |
408 mul edx | |
409 add ebx,eax | |
410 mov eax,DWORD [8+esi] | |
411 adc ecx,edx | |
412 mov edx,DWORD [28+edi] | |
413 adc ebp,0 | |
414 ; mul a[2]*b[7] | |
415 mul edx | |
416 add ebx,eax | |
417 mov eax,DWORD [20+esp] | |
418 adc ecx,edx | |
419 mov edx,DWORD [12+edi] | |
420 adc ebp,0 | |
421 mov DWORD [36+eax],ebx | |
422 mov eax,DWORD [28+esi] | |
423 ; saved r[9] | |
424 ; ################## Calculate word 10 | |
425 xor ebx,ebx | |
426 ; mul a[7]*b[3] | |
427 mul edx | |
428 add ecx,eax | |
429 mov eax,DWORD [24+esi] | |
430 adc ebp,edx | |
431 mov edx,DWORD [16+edi] | |
432 adc ebx,0 | |
433 ; mul a[6]*b[4] | |
434 mul edx | |
435 add ecx,eax | |
436 mov eax,DWORD [20+esi] | |
437 adc ebp,edx | |
438 mov edx,DWORD [20+edi] | |
439 adc ebx,0 | |
440 ; mul a[5]*b[5] | |
441 mul edx | |
442 add ecx,eax | |
443 mov eax,DWORD [16+esi] | |
444 adc ebp,edx | |
445 mov edx,DWORD [24+edi] | |
446 adc ebx,0 | |
447 ; mul a[4]*b[6] | |
448 mul edx | |
449 add ecx,eax | |
450 mov eax,DWORD [12+esi] | |
451 adc ebp,edx | |
452 mov edx,DWORD [28+edi] | |
453 adc ebx,0 | |
454 ; mul a[3]*b[7] | |
455 mul edx | |
456 add ecx,eax | |
457 mov eax,DWORD [20+esp] | |
458 adc ebp,edx | |
459 mov edx,DWORD [16+edi] | |
460 adc ebx,0 | |
461 mov DWORD [40+eax],ecx | |
462 mov eax,DWORD [28+esi] | |
463 ; saved r[10] | |
464 ; ################## Calculate word 11 | |
465 xor ecx,ecx | |
466 ; mul a[7]*b[4] | |
467 mul edx | |
468 add ebp,eax | |
469 mov eax,DWORD [24+esi] | |
470 adc ebx,edx | |
471 mov edx,DWORD [20+edi] | |
472 adc ecx,0 | |
473 ; mul a[6]*b[5] | |
474 mul edx | |
475 add ebp,eax | |
476 mov eax,DWORD [20+esi] | |
477 adc ebx,edx | |
478 mov edx,DWORD [24+edi] | |
479 adc ecx,0 | |
480 ; mul a[5]*b[6] | |
481 mul edx | |
482 add ebp,eax | |
483 mov eax,DWORD [16+esi] | |
484 adc ebx,edx | |
485 mov edx,DWORD [28+edi] | |
486 adc ecx,0 | |
487 ; mul a[4]*b[7] | |
488 mul edx | |
489 add ebp,eax | |
490 mov eax,DWORD [20+esp] | |
491 adc ebx,edx | |
492 mov edx,DWORD [20+edi] | |
493 adc ecx,0 | |
494 mov DWORD [44+eax],ebp | |
495 mov eax,DWORD [28+esi] | |
496 ; saved r[11] | |
497 ; ################## Calculate word 12 | |
498 xor ebp,ebp | |
499 ; mul a[7]*b[5] | |
500 mul edx | |
501 add ebx,eax | |
502 mov eax,DWORD [24+esi] | |
503 adc ecx,edx | |
504 mov edx,DWORD [24+edi] | |
505 adc ebp,0 | |
506 ; mul a[6]*b[6] | |
507 mul edx | |
508 add ebx,eax | |
509 mov eax,DWORD [20+esi] | |
510 adc ecx,edx | |
511 mov edx,DWORD [28+edi] | |
512 adc ebp,0 | |
513 ; mul a[5]*b[7] | |
514 mul edx | |
515 add ebx,eax | |
516 mov eax,DWORD [20+esp] | |
517 adc ecx,edx | |
518 mov edx,DWORD [24+edi] | |
519 adc ebp,0 | |
520 mov DWORD [48+eax],ebx | |
521 mov eax,DWORD [28+esi] | |
522 ; saved r[12] | |
523 ; ################## Calculate word 13 | |
524 xor ebx,ebx | |
525 ; mul a[7]*b[6] | |
526 mul edx | |
527 add ecx,eax | |
528 mov eax,DWORD [24+esi] | |
529 adc ebp,edx | |
530 mov edx,DWORD [28+edi] | |
531 adc ebx,0 | |
532 ; mul a[6]*b[7] | |
533 mul edx | |
534 add ecx,eax | |
535 mov eax,DWORD [20+esp] | |
536 adc ebp,edx | |
537 mov edx,DWORD [28+edi] | |
538 adc ebx,0 | |
539 mov DWORD [52+eax],ecx | |
540 mov eax,DWORD [28+esi] | |
541 ; saved r[13] | |
542 ; ################## Calculate word 14 | |
543 xor ecx,ecx | |
544 ; mul a[7]*b[7] | |
545 mul edx | |
546 add ebp,eax | |
547 mov eax,DWORD [20+esp] | |
548 adc ebx,edx | |
549 adc ecx,0 | |
550 mov DWORD [56+eax],ebp | |
551 ; saved r[14] | |
552 ; save r[15] | |
553 mov DWORD [60+eax],ebx | |
554 pop ebx | |
555 pop ebp | |
556 pop edi | |
557 pop esi | |
558 ret | |
559 global _bn_mul_comba4 | |
560 align 16 | |
561 _bn_mul_comba4: | |
562 L$_bn_mul_comba4_begin: | |
563 push esi | |
564 mov esi,DWORD [12+esp] | |
565 push edi | |
566 mov edi,DWORD [20+esp] | |
567 push ebp | |
568 push ebx | |
569 xor ebx,ebx | |
570 mov eax,DWORD [esi] | |
571 xor ecx,ecx | |
572 mov edx,DWORD [edi] | |
573 ; ################## Calculate word 0 | |
574 xor ebp,ebp | |
575 ; mul a[0]*b[0] | |
576 mul edx | |
577 add ebx,eax | |
578 mov eax,DWORD [20+esp] | |
579 adc ecx,edx | |
580 mov edx,DWORD [edi] | |
581 adc ebp,0 | |
582 mov DWORD [eax],ebx | |
583 mov eax,DWORD [4+esi] | |
584 ; saved r[0] | |
585 ; ################## Calculate word 1 | |
586 xor ebx,ebx | |
587 ; mul a[1]*b[0] | |
588 mul edx | |
589 add ecx,eax | |
590 mov eax,DWORD [esi] | |
591 adc ebp,edx | |
592 mov edx,DWORD [4+edi] | |
593 adc ebx,0 | |
594 ; mul a[0]*b[1] | |
595 mul edx | |
596 add ecx,eax | |
597 mov eax,DWORD [20+esp] | |
598 adc ebp,edx | |
599 mov edx,DWORD [edi] | |
600 adc ebx,0 | |
601 mov DWORD [4+eax],ecx | |
602 mov eax,DWORD [8+esi] | |
603 ; saved r[1] | |
604 ; ################## Calculate word 2 | |
605 xor ecx,ecx | |
606 ; mul a[2]*b[0] | |
607 mul edx | |
608 add ebp,eax | |
609 mov eax,DWORD [4+esi] | |
610 adc ebx,edx | |
611 mov edx,DWORD [4+edi] | |
612 adc ecx,0 | |
613 ; mul a[1]*b[1] | |
614 mul edx | |
615 add ebp,eax | |
616 mov eax,DWORD [esi] | |
617 adc ebx,edx | |
618 mov edx,DWORD [8+edi] | |
619 adc ecx,0 | |
620 ; mul a[0]*b[2] | |
621 mul edx | |
622 add ebp,eax | |
623 mov eax,DWORD [20+esp] | |
624 adc ebx,edx | |
625 mov edx,DWORD [edi] | |
626 adc ecx,0 | |
627 mov DWORD [8+eax],ebp | |
628 mov eax,DWORD [12+esi] | |
629 ; saved r[2] | |
630 ; ################## Calculate word 3 | |
631 xor ebp,ebp | |
632 ; mul a[3]*b[0] | |
633 mul edx | |
634 add ebx,eax | |
635 mov eax,DWORD [8+esi] | |
636 adc ecx,edx | |
637 mov edx,DWORD [4+edi] | |
638 adc ebp,0 | |
639 ; mul a[2]*b[1] | |
640 mul edx | |
641 add ebx,eax | |
642 mov eax,DWORD [4+esi] | |
643 adc ecx,edx | |
644 mov edx,DWORD [8+edi] | |
645 adc ebp,0 | |
646 ; mul a[1]*b[2] | |
647 mul edx | |
648 add ebx,eax | |
649 mov eax,DWORD [esi] | |
650 adc ecx,edx | |
651 mov edx,DWORD [12+edi] | |
652 adc ebp,0 | |
653 ; mul a[0]*b[3] | |
654 mul edx | |
655 add ebx,eax | |
656 mov eax,DWORD [20+esp] | |
657 adc ecx,edx | |
658 mov edx,DWORD [4+edi] | |
659 adc ebp,0 | |
660 mov DWORD [12+eax],ebx | |
661 mov eax,DWORD [12+esi] | |
662 ; saved r[3] | |
663 ; ################## Calculate word 4 | |
664 xor ebx,ebx | |
665 ; mul a[3]*b[1] | |
666 mul edx | |
667 add ecx,eax | |
668 mov eax,DWORD [8+esi] | |
669 adc ebp,edx | |
670 mov edx,DWORD [8+edi] | |
671 adc ebx,0 | |
672 ; mul a[2]*b[2] | |
673 mul edx | |
674 add ecx,eax | |
675 mov eax,DWORD [4+esi] | |
676 adc ebp,edx | |
677 mov edx,DWORD [12+edi] | |
678 adc ebx,0 | |
679 ; mul a[1]*b[3] | |
680 mul edx | |
681 add ecx,eax | |
682 mov eax,DWORD [20+esp] | |
683 adc ebp,edx | |
684 mov edx,DWORD [8+edi] | |
685 adc ebx,0 | |
686 mov DWORD [16+eax],ecx | |
687 mov eax,DWORD [12+esi] | |
688 ; saved r[4] | |
689 ; ################## Calculate word 5 | |
690 xor ecx,ecx | |
691 ; mul a[3]*b[2] | |
692 mul edx | |
693 add ebp,eax | |
694 mov eax,DWORD [8+esi] | |
695 adc ebx,edx | |
696 mov edx,DWORD [12+edi] | |
697 adc ecx,0 | |
698 ; mul a[2]*b[3] | |
699 mul edx | |
700 add ebp,eax | |
701 mov eax,DWORD [20+esp] | |
702 adc ebx,edx | |
703 mov edx,DWORD [12+edi] | |
704 adc ecx,0 | |
705 mov DWORD [20+eax],ebp | |
706 mov eax,DWORD [12+esi] | |
707 ; saved r[5] | |
708 ; ################## Calculate word 6 | |
709 xor ebp,ebp | |
710 ; mul a[3]*b[3] | |
711 mul edx | |
712 add ebx,eax | |
713 mov eax,DWORD [20+esp] | |
714 adc ecx,edx | |
715 adc ebp,0 | |
716 mov DWORD [24+eax],ebx | |
717 ; saved r[6] | |
718 ; save r[7] | |
719 mov DWORD [28+eax],ecx | |
720 pop ebx | |
721 pop ebp | |
722 pop edi | |
723 pop esi | |
724 ret | |
725 global _bn_sqr_comba8 | |
726 align 16 | |
727 _bn_sqr_comba8: | |
728 L$_bn_sqr_comba8_begin: | |
729 push esi | |
730 push edi | |
731 push ebp | |
732 push ebx | |
733 mov edi,DWORD [20+esp] | |
734 mov esi,DWORD [24+esp] | |
735 xor ebx,ebx | |
736 xor ecx,ecx | |
737 mov eax,DWORD [esi] | |
738 ; ############### Calculate word 0 | |
739 xor ebp,ebp | |
740 ; sqr a[0]*a[0] | |
741 mul eax | |
742 add ebx,eax | |
743 adc ecx,edx | |
744 mov edx,DWORD [esi] | |
745 adc ebp,0 | |
746 mov DWORD [edi],ebx | |
747 mov eax,DWORD [4+esi] | |
748 ; saved r[0] | |
749 ; ############### Calculate word 1 | |
750 xor ebx,ebx | |
751 ; sqr a[1]*a[0] | |
752 mul edx | |
753 add eax,eax | |
754 adc edx,edx | |
755 adc ebx,0 | |
756 add ecx,eax | |
757 adc ebp,edx | |
758 mov eax,DWORD [8+esi] | |
759 adc ebx,0 | |
760 mov DWORD [4+edi],ecx | |
761 mov edx,DWORD [esi] | |
762 ; saved r[1] | |
763 ; ############### Calculate word 2 | |
764 xor ecx,ecx | |
765 ; sqr a[2]*a[0] | |
766 mul edx | |
767 add eax,eax | |
768 adc edx,edx | |
769 adc ecx,0 | |
770 add ebp,eax | |
771 adc ebx,edx | |
772 mov eax,DWORD [4+esi] | |
773 adc ecx,0 | |
774 ; sqr a[1]*a[1] | |
775 mul eax | |
776 add ebp,eax | |
777 adc ebx,edx | |
778 mov edx,DWORD [esi] | |
779 adc ecx,0 | |
780 mov DWORD [8+edi],ebp | |
781 mov eax,DWORD [12+esi] | |
782 ; saved r[2] | |
783 ; ############### Calculate word 3 | |
784 xor ebp,ebp | |
785 ; sqr a[3]*a[0] | |
786 mul edx | |
787 add eax,eax | |
788 adc edx,edx | |
789 adc ebp,0 | |
790 add ebx,eax | |
791 adc ecx,edx | |
792 mov eax,DWORD [8+esi] | |
793 adc ebp,0 | |
794 mov edx,DWORD [4+esi] | |
795 ; sqr a[2]*a[1] | |
796 mul edx | |
797 add eax,eax | |
798 adc edx,edx | |
799 adc ebp,0 | |
800 add ebx,eax | |
801 adc ecx,edx | |
802 mov eax,DWORD [16+esi] | |
803 adc ebp,0 | |
804 mov DWORD [12+edi],ebx | |
805 mov edx,DWORD [esi] | |
806 ; saved r[3] | |
807 ; ############### Calculate word 4 | |
808 xor ebx,ebx | |
809 ; sqr a[4]*a[0] | |
810 mul edx | |
811 add eax,eax | |
812 adc edx,edx | |
813 adc ebx,0 | |
814 add ecx,eax | |
815 adc ebp,edx | |
816 mov eax,DWORD [12+esi] | |
817 adc ebx,0 | |
818 mov edx,DWORD [4+esi] | |
819 ; sqr a[3]*a[1] | |
820 mul edx | |
821 add eax,eax | |
822 adc edx,edx | |
823 adc ebx,0 | |
824 add ecx,eax | |
825 adc ebp,edx | |
826 mov eax,DWORD [8+esi] | |
827 adc ebx,0 | |
828 ; sqr a[2]*a[2] | |
829 mul eax | |
830 add ecx,eax | |
831 adc ebp,edx | |
832 mov edx,DWORD [esi] | |
833 adc ebx,0 | |
834 mov DWORD [16+edi],ecx | |
835 mov eax,DWORD [20+esi] | |
836 ; saved r[4] | |
837 ; ############### Calculate word 5 | |
838 xor ecx,ecx | |
839 ; sqr a[5]*a[0] | |
840 mul edx | |
841 add eax,eax | |
842 adc edx,edx | |
843 adc ecx,0 | |
844 add ebp,eax | |
845 adc ebx,edx | |
846 mov eax,DWORD [16+esi] | |
847 adc ecx,0 | |
848 mov edx,DWORD [4+esi] | |
849 ; sqr a[4]*a[1] | |
850 mul edx | |
851 add eax,eax | |
852 adc edx,edx | |
853 adc ecx,0 | |
854 add ebp,eax | |
855 adc ebx,edx | |
856 mov eax,DWORD [12+esi] | |
857 adc ecx,0 | |
858 mov edx,DWORD [8+esi] | |
859 ; sqr a[3]*a[2] | |
860 mul edx | |
861 add eax,eax | |
862 adc edx,edx | |
863 adc ecx,0 | |
864 add ebp,eax | |
865 adc ebx,edx | |
866 mov eax,DWORD [24+esi] | |
867 adc ecx,0 | |
868 mov DWORD [20+edi],ebp | |
869 mov edx,DWORD [esi] | |
870 ; saved r[5] | |
871 ; ############### Calculate word 6 | |
872 xor ebp,ebp | |
873 ; sqr a[6]*a[0] | |
874 mul edx | |
875 add eax,eax | |
876 adc edx,edx | |
877 adc ebp,0 | |
878 add ebx,eax | |
879 adc ecx,edx | |
880 mov eax,DWORD [20+esi] | |
881 adc ebp,0 | |
882 mov edx,DWORD [4+esi] | |
883 ; sqr a[5]*a[1] | |
884 mul edx | |
885 add eax,eax | |
886 adc edx,edx | |
887 adc ebp,0 | |
888 add ebx,eax | |
889 adc ecx,edx | |
890 mov eax,DWORD [16+esi] | |
891 adc ebp,0 | |
892 mov edx,DWORD [8+esi] | |
893 ; sqr a[4]*a[2] | |
894 mul edx | |
895 add eax,eax | |
896 adc edx,edx | |
897 adc ebp,0 | |
898 add ebx,eax | |
899 adc ecx,edx | |
900 mov eax,DWORD [12+esi] | |
901 adc ebp,0 | |
902 ; sqr a[3]*a[3] | |
903 mul eax | |
904 add ebx,eax | |
905 adc ecx,edx | |
906 mov edx,DWORD [esi] | |
907 adc ebp,0 | |
908 mov DWORD [24+edi],ebx | |
909 mov eax,DWORD [28+esi] | |
910 ; saved r[6] | |
911 ; ############### Calculate word 7 | |
912 xor ebx,ebx | |
913 ; sqr a[7]*a[0] | |
914 mul edx | |
915 add eax,eax | |
916 adc edx,edx | |
917 adc ebx,0 | |
918 add ecx,eax | |
919 adc ebp,edx | |
920 mov eax,DWORD [24+esi] | |
921 adc ebx,0 | |
922 mov edx,DWORD [4+esi] | |
923 ; sqr a[6]*a[1] | |
924 mul edx | |
925 add eax,eax | |
926 adc edx,edx | |
927 adc ebx,0 | |
928 add ecx,eax | |
929 adc ebp,edx | |
930 mov eax,DWORD [20+esi] | |
931 adc ebx,0 | |
932 mov edx,DWORD [8+esi] | |
933 ; sqr a[5]*a[2] | |
934 mul edx | |
935 add eax,eax | |
936 adc edx,edx | |
937 adc ebx,0 | |
938 add ecx,eax | |
939 adc ebp,edx | |
940 mov eax,DWORD [16+esi] | |
941 adc ebx,0 | |
942 mov edx,DWORD [12+esi] | |
943 ; sqr a[4]*a[3] | |
944 mul edx | |
945 add eax,eax | |
946 adc edx,edx | |
947 adc ebx,0 | |
948 add ecx,eax | |
949 adc ebp,edx | |
950 mov eax,DWORD [28+esi] | |
951 adc ebx,0 | |
952 mov DWORD [28+edi],ecx | |
953 mov edx,DWORD [4+esi] | |
954 ; saved r[7] | |
955 ; ############### Calculate word 8 | |
956 xor ecx,ecx | |
957 ; sqr a[7]*a[1] | |
958 mul edx | |
959 add eax,eax | |
960 adc edx,edx | |
961 adc ecx,0 | |
962 add ebp,eax | |
963 adc ebx,edx | |
964 mov eax,DWORD [24+esi] | |
965 adc ecx,0 | |
966 mov edx,DWORD [8+esi] | |
967 ; sqr a[6]*a[2] | |
968 mul edx | |
969 add eax,eax | |
970 adc edx,edx | |
971 adc ecx,0 | |
972 add ebp,eax | |
973 adc ebx,edx | |
974 mov eax,DWORD [20+esi] | |
975 adc ecx,0 | |
976 mov edx,DWORD [12+esi] | |
977 ; sqr a[5]*a[3] | |
978 mul edx | |
979 add eax,eax | |
980 adc edx,edx | |
981 adc ecx,0 | |
982 add ebp,eax | |
983 adc ebx,edx | |
984 mov eax,DWORD [16+esi] | |
985 adc ecx,0 | |
986 ; sqr a[4]*a[4] | |
987 mul eax | |
988 add ebp,eax | |
989 adc ebx,edx | |
990 mov edx,DWORD [8+esi] | |
991 adc ecx,0 | |
992 mov DWORD [32+edi],ebp | |
993 mov eax,DWORD [28+esi] | |
994 ; saved r[8] | |
995 ; ############### Calculate word 9 | |
996 xor ebp,ebp | |
997 ; sqr a[7]*a[2] | |
998 mul edx | |
999 add eax,eax | |
1000 adc edx,edx | |
1001 adc ebp,0 | |
1002 add ebx,eax | |
1003 adc ecx,edx | |
1004 mov eax,DWORD [24+esi] | |
1005 adc ebp,0 | |
1006 mov edx,DWORD [12+esi] | |
1007 ; sqr a[6]*a[3] | |
1008 mul edx | |
1009 add eax,eax | |
1010 adc edx,edx | |
1011 adc ebp,0 | |
1012 add ebx,eax | |
1013 adc ecx,edx | |
1014 mov eax,DWORD [20+esi] | |
1015 adc ebp,0 | |
1016 mov edx,DWORD [16+esi] | |
1017 ; sqr a[5]*a[4] | |
1018 mul edx | |
1019 add eax,eax | |
1020 adc edx,edx | |
1021 adc ebp,0 | |
1022 add ebx,eax | |
1023 adc ecx,edx | |
1024 mov eax,DWORD [28+esi] | |
1025 adc ebp,0 | |
1026 mov DWORD [36+edi],ebx | |
1027 mov edx,DWORD [12+esi] | |
1028 ; saved r[9] | |
1029 ; ############### Calculate word 10 | |
1030 xor ebx,ebx | |
1031 ; sqr a[7]*a[3] | |
1032 mul edx | |
1033 add eax,eax | |
1034 adc edx,edx | |
1035 adc ebx,0 | |
1036 add ecx,eax | |
1037 adc ebp,edx | |
1038 mov eax,DWORD [24+esi] | |
1039 adc ebx,0 | |
1040 mov edx,DWORD [16+esi] | |
1041 ; sqr a[6]*a[4] | |
1042 mul edx | |
1043 add eax,eax | |
1044 adc edx,edx | |
1045 adc ebx,0 | |
1046 add ecx,eax | |
1047 adc ebp,edx | |
1048 mov eax,DWORD [20+esi] | |
1049 adc ebx,0 | |
1050 ; sqr a[5]*a[5] | |
1051 mul eax | |
1052 add ecx,eax | |
1053 adc ebp,edx | |
1054 mov edx,DWORD [16+esi] | |
1055 adc ebx,0 | |
1056 mov DWORD [40+edi],ecx | |
1057 mov eax,DWORD [28+esi] | |
1058 ; saved r[10] | |
1059 ; ############### Calculate word 11 | |
1060 xor ecx,ecx | |
1061 ; sqr a[7]*a[4] | |
1062 mul edx | |
1063 add eax,eax | |
1064 adc edx,edx | |
1065 adc ecx,0 | |
1066 add ebp,eax | |
1067 adc ebx,edx | |
1068 mov eax,DWORD [24+esi] | |
1069 adc ecx,0 | |
1070 mov edx,DWORD [20+esi] | |
1071 ; sqr a[6]*a[5] | |
1072 mul edx | |
1073 add eax,eax | |
1074 adc edx,edx | |
1075 adc ecx,0 | |
1076 add ebp,eax | |
1077 adc ebx,edx | |
1078 mov eax,DWORD [28+esi] | |
1079 adc ecx,0 | |
1080 mov DWORD [44+edi],ebp | |
1081 mov edx,DWORD [20+esi] | |
1082 ; saved r[11] | |
1083 ; ############### Calculate word 12 | |
1084 xor ebp,ebp | |
1085 ; sqr a[7]*a[5] | |
1086 mul edx | |
1087 add eax,eax | |
1088 adc edx,edx | |
1089 adc ebp,0 | |
1090 add ebx,eax | |
1091 adc ecx,edx | |
1092 mov eax,DWORD [24+esi] | |
1093 adc ebp,0 | |
1094 ; sqr a[6]*a[6] | |
1095 mul eax | |
1096 add ebx,eax | |
1097 adc ecx,edx | |
1098 mov edx,DWORD [24+esi] | |
1099 adc ebp,0 | |
1100 mov DWORD [48+edi],ebx | |
1101 mov eax,DWORD [28+esi] | |
1102 ; saved r[12] | |
1103 ; ############### Calculate word 13 | |
1104 xor ebx,ebx | |
1105 ; sqr a[7]*a[6] | |
1106 mul edx | |
1107 add eax,eax | |
1108 adc edx,edx | |
1109 adc ebx,0 | |
1110 add ecx,eax | |
1111 adc ebp,edx | |
1112 mov eax,DWORD [28+esi] | |
1113 adc ebx,0 | |
1114 mov DWORD [52+edi],ecx | |
1115 ; saved r[13] | |
1116 ; ############### Calculate word 14 | |
1117 xor ecx,ecx | |
1118 ; sqr a[7]*a[7] | |
1119 mul eax | |
1120 add ebp,eax | |
1121 adc ebx,edx | |
1122 adc ecx,0 | |
1123 mov DWORD [56+edi],ebp | |
1124 ; saved r[14] | |
1125 mov DWORD [60+edi],ebx | |
1126 pop ebx | |
1127 pop ebp | |
1128 pop edi | |
1129 pop esi | |
1130 ret | |
1131 global _bn_sqr_comba4 | |
1132 align 16 | |
1133 _bn_sqr_comba4: | |
1134 L$_bn_sqr_comba4_begin: | |
1135 push esi | |
1136 push edi | |
1137 push ebp | |
1138 push ebx | |
1139 mov edi,DWORD [20+esp] | |
1140 mov esi,DWORD [24+esp] | |
1141 xor ebx,ebx | |
1142 xor ecx,ecx | |
1143 mov eax,DWORD [esi] | |
1144 ; ############### Calculate word 0 | |
1145 xor ebp,ebp | |
1146 ; sqr a[0]*a[0] | |
1147 mul eax | |
1148 add ebx,eax | |
1149 adc ecx,edx | |
1150 mov edx,DWORD [esi] | |
1151 adc ebp,0 | |
1152 mov DWORD [edi],ebx | |
1153 mov eax,DWORD [4+esi] | |
1154 ; saved r[0] | |
1155 ; ############### Calculate word 1 | |
1156 xor ebx,ebx | |
1157 ; sqr a[1]*a[0] | |
1158 mul edx | |
1159 add eax,eax | |
1160 adc edx,edx | |
1161 adc ebx,0 | |
1162 add ecx,eax | |
1163 adc ebp,edx | |
1164 mov eax,DWORD [8+esi] | |
1165 adc ebx,0 | |
1166 mov DWORD [4+edi],ecx | |
1167 mov edx,DWORD [esi] | |
1168 ; saved r[1] | |
1169 ; ############### Calculate word 2 | |
1170 xor ecx,ecx | |
1171 ; sqr a[2]*a[0] | |
1172 mul edx | |
1173 add eax,eax | |
1174 adc edx,edx | |
1175 adc ecx,0 | |
1176 add ebp,eax | |
1177 adc ebx,edx | |
1178 mov eax,DWORD [4+esi] | |
1179 adc ecx,0 | |
1180 ; sqr a[1]*a[1] | |
1181 mul eax | |
1182 add ebp,eax | |
1183 adc ebx,edx | |
1184 mov edx,DWORD [esi] | |
1185 adc ecx,0 | |
1186 mov DWORD [8+edi],ebp | |
1187 mov eax,DWORD [12+esi] | |
1188 ; saved r[2] | |
1189 ; ############### Calculate word 3 | |
1190 xor ebp,ebp | |
1191 ; sqr a[3]*a[0] | |
1192 mul edx | |
1193 add eax,eax | |
1194 adc edx,edx | |
1195 adc ebp,0 | |
1196 add ebx,eax | |
1197 adc ecx,edx | |
1198 mov eax,DWORD [8+esi] | |
1199 adc ebp,0 | |
1200 mov edx,DWORD [4+esi] | |
1201 ; sqr a[2]*a[1] | |
1202 mul edx | |
1203 add eax,eax | |
1204 adc edx,edx | |
1205 adc ebp,0 | |
1206 add ebx,eax | |
1207 adc ecx,edx | |
1208 mov eax,DWORD [12+esi] | |
1209 adc ebp,0 | |
1210 mov DWORD [12+edi],ebx | |
1211 mov edx,DWORD [4+esi] | |
1212 ; saved r[3] | |
1213 ; ############### Calculate word 4 | |
1214 xor ebx,ebx | |
1215 ; sqr a[3]*a[1] | |
1216 mul edx | |
1217 add eax,eax | |
1218 adc edx,edx | |
1219 adc ebx,0 | |
1220 add ecx,eax | |
1221 adc ebp,edx | |
1222 mov eax,DWORD [8+esi] | |
1223 adc ebx,0 | |
1224 ; sqr a[2]*a[2] | |
1225 mul eax | |
1226 add ecx,eax | |
1227 adc ebp,edx | |
1228 mov edx,DWORD [8+esi] | |
1229 adc ebx,0 | |
1230 mov DWORD [16+edi],ecx | |
1231 mov eax,DWORD [12+esi] | |
1232 ; saved r[4] | |
1233 ; ############### Calculate word 5 | |
1234 xor ecx,ecx | |
1235 ; sqr a[3]*a[2] | |
1236 mul edx | |
1237 add eax,eax | |
1238 adc edx,edx | |
1239 adc ecx,0 | |
1240 add ebp,eax | |
1241 adc ebx,edx | |
1242 mov eax,DWORD [12+esi] | |
1243 adc ecx,0 | |
1244 mov DWORD [20+edi],ebp | |
1245 ; saved r[5] | |
1246 ; ############### Calculate word 6 | |
1247 xor ebp,ebp | |
1248 ; sqr a[3]*a[3] | |
1249 mul eax | |
1250 add ebx,eax | |
1251 adc ecx,edx | |
1252 adc ebp,0 | |
1253 mov DWORD [24+edi],ebx | |
1254 ; saved r[6] | |
1255 mov DWORD [28+edi],ecx | |
1256 pop ebx | |
1257 pop ebp | |
1258 pop edi | |
1259 pop esi | |
1260 ret | |
OLD | NEW |