OLD | NEW |
| (Empty) |
1 #if defined(__i386__) | |
2 .file "src/crypto/bn/asm/x86-mont.S" | |
3 .text | |
4 .globl bn_mul_mont | |
5 .hidden bn_mul_mont | |
6 .type bn_mul_mont,@function | |
7 .align 16 | |
8 bn_mul_mont: | |
9 .L_bn_mul_mont_begin: | |
10 pushl %ebp | |
11 pushl %ebx | |
12 pushl %esi | |
13 pushl %edi | |
14 xorl %eax,%eax | |
15 movl 40(%esp),%edi | |
16 cmpl $4,%edi | |
17 jl .L000just_leave | |
18 leal 20(%esp),%esi | |
19 leal 24(%esp),%edx | |
20 movl %esp,%ebp | |
21 addl $2,%edi | |
22 negl %edi | |
23 leal -32(%esp,%edi,4),%esp | |
24 negl %edi | |
25 movl %esp,%eax | |
26 subl %edx,%eax | |
27 andl $2047,%eax | |
28 subl %eax,%esp | |
29 xorl %esp,%edx | |
30 andl $2048,%edx | |
31 xorl $2048,%edx | |
32 subl %edx,%esp | |
33 andl $-64,%esp | |
34 movl (%esi),%eax | |
35 movl 4(%esi),%ebx | |
36 movl 8(%esi),%ecx | |
37 movl 12(%esi),%edx | |
38 movl 16(%esi),%esi | |
39 movl (%esi),%esi | |
40 movl %eax,4(%esp) | |
41 movl %ebx,8(%esp) | |
42 movl %ecx,12(%esp) | |
43 movl %edx,16(%esp) | |
44 movl %esi,20(%esp) | |
45 leal -3(%edi),%ebx | |
46 movl %ebp,24(%esp) | |
47 call .L001PIC_me_up | |
48 .L001PIC_me_up: | |
49 popl %eax | |
50 leal OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax | |
51 btl $26,(%eax) | |
52 jnc .L002non_sse2 | |
53 movl $-1,%eax | |
54 movd %eax,%mm7 | |
55 movl 8(%esp),%esi | |
56 movl 12(%esp),%edi | |
57 movl 16(%esp),%ebp | |
58 xorl %edx,%edx | |
59 xorl %ecx,%ecx | |
60 movd (%edi),%mm4 | |
61 movd (%esi),%mm5 | |
62 movd (%ebp),%mm3 | |
63 pmuludq %mm4,%mm5 | |
64 movq %mm5,%mm2 | |
65 movq %mm5,%mm0 | |
66 pand %mm7,%mm0 | |
67 pmuludq 20(%esp),%mm5 | |
68 pmuludq %mm5,%mm3 | |
69 paddq %mm0,%mm3 | |
70 movd 4(%ebp),%mm1 | |
71 movd 4(%esi),%mm0 | |
72 psrlq $32,%mm2 | |
73 psrlq $32,%mm3 | |
74 incl %ecx | |
75 .align 16 | |
76 .L0031st: | |
77 pmuludq %mm4,%mm0 | |
78 pmuludq %mm5,%mm1 | |
79 paddq %mm0,%mm2 | |
80 paddq %mm1,%mm3 | |
81 movq %mm2,%mm0 | |
82 pand %mm7,%mm0 | |
83 movd 4(%ebp,%ecx,4),%mm1 | |
84 paddq %mm0,%mm3 | |
85 movd 4(%esi,%ecx,4),%mm0 | |
86 psrlq $32,%mm2 | |
87 movd %mm3,28(%esp,%ecx,4) | |
88 psrlq $32,%mm3 | |
89 leal 1(%ecx),%ecx | |
90 cmpl %ebx,%ecx | |
91 jl .L0031st | |
92 pmuludq %mm4,%mm0 | |
93 pmuludq %mm5,%mm1 | |
94 paddq %mm0,%mm2 | |
95 paddq %mm1,%mm3 | |
96 movq %mm2,%mm0 | |
97 pand %mm7,%mm0 | |
98 paddq %mm0,%mm3 | |
99 movd %mm3,28(%esp,%ecx,4) | |
100 psrlq $32,%mm2 | |
101 psrlq $32,%mm3 | |
102 paddq %mm2,%mm3 | |
103 movq %mm3,32(%esp,%ebx,4) | |
104 incl %edx | |
105 .L004outer: | |
106 xorl %ecx,%ecx | |
107 movd (%edi,%edx,4),%mm4 | |
108 movd (%esi),%mm5 | |
109 movd 32(%esp),%mm6 | |
110 movd (%ebp),%mm3 | |
111 pmuludq %mm4,%mm5 | |
112 paddq %mm6,%mm5 | |
113 movq %mm5,%mm0 | |
114 movq %mm5,%mm2 | |
115 pand %mm7,%mm0 | |
116 pmuludq 20(%esp),%mm5 | |
117 pmuludq %mm5,%mm3 | |
118 paddq %mm0,%mm3 | |
119 movd 36(%esp),%mm6 | |
120 movd 4(%ebp),%mm1 | |
121 movd 4(%esi),%mm0 | |
122 psrlq $32,%mm2 | |
123 psrlq $32,%mm3 | |
124 paddq %mm6,%mm2 | |
125 incl %ecx | |
126 decl %ebx | |
127 .L005inner: | |
128 pmuludq %mm4,%mm0 | |
129 pmuludq %mm5,%mm1 | |
130 paddq %mm0,%mm2 | |
131 paddq %mm1,%mm3 | |
132 movq %mm2,%mm0 | |
133 movd 36(%esp,%ecx,4),%mm6 | |
134 pand %mm7,%mm0 | |
135 movd 4(%ebp,%ecx,4),%mm1 | |
136 paddq %mm0,%mm3 | |
137 movd 4(%esi,%ecx,4),%mm0 | |
138 psrlq $32,%mm2 | |
139 movd %mm3,28(%esp,%ecx,4) | |
140 psrlq $32,%mm3 | |
141 paddq %mm6,%mm2 | |
142 decl %ebx | |
143 leal 1(%ecx),%ecx | |
144 jnz .L005inner | |
145 movl %ecx,%ebx | |
146 pmuludq %mm4,%mm0 | |
147 pmuludq %mm5,%mm1 | |
148 paddq %mm0,%mm2 | |
149 paddq %mm1,%mm3 | |
150 movq %mm2,%mm0 | |
151 pand %mm7,%mm0 | |
152 paddq %mm0,%mm3 | |
153 movd %mm3,28(%esp,%ecx,4) | |
154 psrlq $32,%mm2 | |
155 psrlq $32,%mm3 | |
156 movd 36(%esp,%ebx,4),%mm6 | |
157 paddq %mm2,%mm3 | |
158 paddq %mm6,%mm3 | |
159 movq %mm3,32(%esp,%ebx,4) | |
160 leal 1(%edx),%edx | |
161 cmpl %ebx,%edx | |
162 jle .L004outer | |
163 emms | |
164 jmp .L006common_tail | |
165 .align 16 | |
166 .L002non_sse2: | |
167 movl 8(%esp),%esi | |
168 leal 1(%ebx),%ebp | |
169 movl 12(%esp),%edi | |
170 xorl %ecx,%ecx | |
171 movl %esi,%edx | |
172 andl $1,%ebp | |
173 subl %edi,%edx | |
174 leal 4(%edi,%ebx,4),%eax | |
175 orl %edx,%ebp | |
176 movl (%edi),%edi | |
177 jz .L007bn_sqr_mont | |
178 movl %eax,28(%esp) | |
179 movl (%esi),%eax | |
180 xorl %edx,%edx | |
181 .align 16 | |
182 .L008mull: | |
183 movl %edx,%ebp | |
184 mull %edi | |
185 addl %eax,%ebp | |
186 leal 1(%ecx),%ecx | |
187 adcl $0,%edx | |
188 movl (%esi,%ecx,4),%eax | |
189 cmpl %ebx,%ecx | |
190 movl %ebp,28(%esp,%ecx,4) | |
191 jl .L008mull | |
192 movl %edx,%ebp | |
193 mull %edi | |
194 movl 20(%esp),%edi | |
195 addl %ebp,%eax | |
196 movl 16(%esp),%esi | |
197 adcl $0,%edx | |
198 imull 32(%esp),%edi | |
199 movl %eax,32(%esp,%ebx,4) | |
200 xorl %ecx,%ecx | |
201 movl %edx,36(%esp,%ebx,4) | |
202 movl %ecx,40(%esp,%ebx,4) | |
203 movl (%esi),%eax | |
204 mull %edi | |
205 addl 32(%esp),%eax | |
206 movl 4(%esi),%eax | |
207 adcl $0,%edx | |
208 incl %ecx | |
209 jmp .L0092ndmadd | |
210 .align 16 | |
211 .L0101stmadd: | |
212 movl %edx,%ebp | |
213 mull %edi | |
214 addl 32(%esp,%ecx,4),%ebp | |
215 leal 1(%ecx),%ecx | |
216 adcl $0,%edx | |
217 addl %eax,%ebp | |
218 movl (%esi,%ecx,4),%eax | |
219 adcl $0,%edx | |
220 cmpl %ebx,%ecx | |
221 movl %ebp,28(%esp,%ecx,4) | |
222 jl .L0101stmadd | |
223 movl %edx,%ebp | |
224 mull %edi | |
225 addl 32(%esp,%ebx,4),%eax | |
226 movl 20(%esp),%edi | |
227 adcl $0,%edx | |
228 movl 16(%esp),%esi | |
229 addl %eax,%ebp | |
230 adcl $0,%edx | |
231 imull 32(%esp),%edi | |
232 xorl %ecx,%ecx | |
233 addl 36(%esp,%ebx,4),%edx | |
234 movl %ebp,32(%esp,%ebx,4) | |
235 adcl $0,%ecx | |
236 movl (%esi),%eax | |
237 movl %edx,36(%esp,%ebx,4) | |
238 movl %ecx,40(%esp,%ebx,4) | |
239 mull %edi | |
240 addl 32(%esp),%eax | |
241 movl 4(%esi),%eax | |
242 adcl $0,%edx | |
243 movl $1,%ecx | |
244 .align 16 | |
245 .L0092ndmadd: | |
246 movl %edx,%ebp | |
247 mull %edi | |
248 addl 32(%esp,%ecx,4),%ebp | |
249 leal 1(%ecx),%ecx | |
250 adcl $0,%edx | |
251 addl %eax,%ebp | |
252 movl (%esi,%ecx,4),%eax | |
253 adcl $0,%edx | |
254 cmpl %ebx,%ecx | |
255 movl %ebp,24(%esp,%ecx,4) | |
256 jl .L0092ndmadd | |
257 movl %edx,%ebp | |
258 mull %edi | |
259 addl 32(%esp,%ebx,4),%ebp | |
260 adcl $0,%edx | |
261 addl %eax,%ebp | |
262 adcl $0,%edx | |
263 movl %ebp,28(%esp,%ebx,4) | |
264 xorl %eax,%eax | |
265 movl 12(%esp),%ecx | |
266 addl 36(%esp,%ebx,4),%edx | |
267 adcl 40(%esp,%ebx,4),%eax | |
268 leal 4(%ecx),%ecx | |
269 movl %edx,32(%esp,%ebx,4) | |
270 cmpl 28(%esp),%ecx | |
271 movl %eax,36(%esp,%ebx,4) | |
272 je .L006common_tail | |
273 movl (%ecx),%edi | |
274 movl 8(%esp),%esi | |
275 movl %ecx,12(%esp) | |
276 xorl %ecx,%ecx | |
277 xorl %edx,%edx | |
278 movl (%esi),%eax | |
279 jmp .L0101stmadd | |
280 .align 16 | |
281 .L007bn_sqr_mont: | |
282 movl %ebx,(%esp) | |
283 movl %ecx,12(%esp) | |
284 movl %edi,%eax | |
285 mull %edi | |
286 movl %eax,32(%esp) | |
287 movl %edx,%ebx | |
288 shrl $1,%edx | |
289 andl $1,%ebx | |
290 incl %ecx | |
291 .align 16 | |
292 .L011sqr: | |
293 movl (%esi,%ecx,4),%eax | |
294 movl %edx,%ebp | |
295 mull %edi | |
296 addl %ebp,%eax | |
297 leal 1(%ecx),%ecx | |
298 adcl $0,%edx | |
299 leal (%ebx,%eax,2),%ebp | |
300 shrl $31,%eax | |
301 cmpl (%esp),%ecx | |
302 movl %eax,%ebx | |
303 movl %ebp,28(%esp,%ecx,4) | |
304 jl .L011sqr | |
305 movl (%esi,%ecx,4),%eax | |
306 movl %edx,%ebp | |
307 mull %edi | |
308 addl %ebp,%eax | |
309 movl 20(%esp),%edi | |
310 adcl $0,%edx | |
311 movl 16(%esp),%esi | |
312 leal (%ebx,%eax,2),%ebp | |
313 imull 32(%esp),%edi | |
314 shrl $31,%eax | |
315 movl %ebp,32(%esp,%ecx,4) | |
316 leal (%eax,%edx,2),%ebp | |
317 movl (%esi),%eax | |
318 shrl $31,%edx | |
319 movl %ebp,36(%esp,%ecx,4) | |
320 movl %edx,40(%esp,%ecx,4) | |
321 mull %edi | |
322 addl 32(%esp),%eax | |
323 movl %ecx,%ebx | |
324 adcl $0,%edx | |
325 movl 4(%esi),%eax | |
326 movl $1,%ecx | |
327 .align 16 | |
328 .L0123rdmadd: | |
329 movl %edx,%ebp | |
330 mull %edi | |
331 addl 32(%esp,%ecx,4),%ebp | |
332 adcl $0,%edx | |
333 addl %eax,%ebp | |
334 movl 4(%esi,%ecx,4),%eax | |
335 adcl $0,%edx | |
336 movl %ebp,28(%esp,%ecx,4) | |
337 movl %edx,%ebp | |
338 mull %edi | |
339 addl 36(%esp,%ecx,4),%ebp | |
340 leal 2(%ecx),%ecx | |
341 adcl $0,%edx | |
342 addl %eax,%ebp | |
343 movl (%esi,%ecx,4),%eax | |
344 adcl $0,%edx | |
345 cmpl %ebx,%ecx | |
346 movl %ebp,24(%esp,%ecx,4) | |
347 jl .L0123rdmadd | |
348 movl %edx,%ebp | |
349 mull %edi | |
350 addl 32(%esp,%ebx,4),%ebp | |
351 adcl $0,%edx | |
352 addl %eax,%ebp | |
353 adcl $0,%edx | |
354 movl %ebp,28(%esp,%ebx,4) | |
355 movl 12(%esp),%ecx | |
356 xorl %eax,%eax | |
357 movl 8(%esp),%esi | |
358 addl 36(%esp,%ebx,4),%edx | |
359 adcl 40(%esp,%ebx,4),%eax | |
360 movl %edx,32(%esp,%ebx,4) | |
361 cmpl %ebx,%ecx | |
362 movl %eax,36(%esp,%ebx,4) | |
363 je .L006common_tail | |
364 movl 4(%esi,%ecx,4),%edi | |
365 leal 1(%ecx),%ecx | |
366 movl %edi,%eax | |
367 movl %ecx,12(%esp) | |
368 mull %edi | |
369 addl 32(%esp,%ecx,4),%eax | |
370 adcl $0,%edx | |
371 movl %eax,32(%esp,%ecx,4) | |
372 xorl %ebp,%ebp | |
373 cmpl %ebx,%ecx | |
374 leal 1(%ecx),%ecx | |
375 je .L013sqrlast | |
376 movl %edx,%ebx | |
377 shrl $1,%edx | |
378 andl $1,%ebx | |
379 .align 16 | |
380 .L014sqradd: | |
381 movl (%esi,%ecx,4),%eax | |
382 movl %edx,%ebp | |
383 mull %edi | |
384 addl %ebp,%eax | |
385 leal (%eax,%eax,1),%ebp | |
386 adcl $0,%edx | |
387 shrl $31,%eax | |
388 addl 32(%esp,%ecx,4),%ebp | |
389 leal 1(%ecx),%ecx | |
390 adcl $0,%eax | |
391 addl %ebx,%ebp | |
392 adcl $0,%eax | |
393 cmpl (%esp),%ecx | |
394 movl %ebp,28(%esp,%ecx,4) | |
395 movl %eax,%ebx | |
396 jle .L014sqradd | |
397 movl %edx,%ebp | |
398 addl %edx,%edx | |
399 shrl $31,%ebp | |
400 addl %ebx,%edx | |
401 adcl $0,%ebp | |
402 .L013sqrlast: | |
403 movl 20(%esp),%edi | |
404 movl 16(%esp),%esi | |
405 imull 32(%esp),%edi | |
406 addl 32(%esp,%ecx,4),%edx | |
407 movl (%esi),%eax | |
408 adcl $0,%ebp | |
409 movl %edx,32(%esp,%ecx,4) | |
410 movl %ebp,36(%esp,%ecx,4) | |
411 mull %edi | |
412 addl 32(%esp),%eax | |
413 leal -1(%ecx),%ebx | |
414 adcl $0,%edx | |
415 movl $1,%ecx | |
416 movl 4(%esi),%eax | |
417 jmp .L0123rdmadd | |
418 .align 16 | |
419 .L006common_tail: | |
420 movl 16(%esp),%ebp | |
421 movl 4(%esp),%edi | |
422 leal 32(%esp),%esi | |
423 movl (%esi),%eax | |
424 movl %ebx,%ecx | |
425 xorl %edx,%edx | |
426 .align 16 | |
427 .L015sub: | |
428 sbbl (%ebp,%edx,4),%eax | |
429 movl %eax,(%edi,%edx,4) | |
430 decl %ecx | |
431 movl 4(%esi,%edx,4),%eax | |
432 leal 1(%edx),%edx | |
433 jge .L015sub | |
434 sbbl $0,%eax | |
435 .align 16 | |
436 .L016copy: | |
437 movl (%esi,%ebx,4),%edx | |
438 movl (%edi,%ebx,4),%ebp | |
439 xorl %ebp,%edx | |
440 andl %eax,%edx | |
441 xorl %ebp,%edx | |
442 movl %ecx,(%esi,%ebx,4) | |
443 movl %edx,(%edi,%ebx,4) | |
444 decl %ebx | |
445 jge .L016copy | |
446 movl 24(%esp),%esp | |
447 movl $1,%eax | |
448 .L000just_leave: | |
449 popl %edi | |
450 popl %esi | |
451 popl %ebx | |
452 popl %ebp | |
453 ret | |
454 .size bn_mul_mont,.-.L_bn_mul_mont_begin | |
455 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | |
456 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 | |
457 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 | |
458 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 | |
459 .byte 111,114,103,62,0 | |
460 #endif | |
OLD | NEW |