OLD | NEW |
| (Empty) |
1 #if defined(__i386__) | |
2 .file "src/crypto/bn/asm/x86-mont.S" | |
3 .text | |
4 .globl _bn_mul_mont | |
5 .private_extern _bn_mul_mont | |
6 .align 4 | |
7 _bn_mul_mont: | |
8 L_bn_mul_mont_begin: | |
9 pushl %ebp | |
10 pushl %ebx | |
11 pushl %esi | |
12 pushl %edi | |
13 xorl %eax,%eax | |
14 movl 40(%esp),%edi | |
15 cmpl $4,%edi | |
16 jl L000just_leave | |
17 leal 20(%esp),%esi | |
18 leal 24(%esp),%edx | |
19 movl %esp,%ebp | |
20 addl $2,%edi | |
21 negl %edi | |
22 leal -32(%esp,%edi,4),%esp | |
23 negl %edi | |
24 movl %esp,%eax | |
25 subl %edx,%eax | |
26 andl $2047,%eax | |
27 subl %eax,%esp | |
28 xorl %esp,%edx | |
29 andl $2048,%edx | |
30 xorl $2048,%edx | |
31 subl %edx,%esp | |
32 andl $-64,%esp | |
33 movl (%esi),%eax | |
34 movl 4(%esi),%ebx | |
35 movl 8(%esi),%ecx | |
36 movl 12(%esi),%edx | |
37 movl 16(%esi),%esi | |
38 movl (%esi),%esi | |
39 movl %eax,4(%esp) | |
40 movl %ebx,8(%esp) | |
41 movl %ecx,12(%esp) | |
42 movl %edx,16(%esp) | |
43 movl %esi,20(%esp) | |
44 leal -3(%edi),%ebx | |
45 movl %ebp,24(%esp) | |
46 call L001PIC_me_up | |
47 L001PIC_me_up: | |
48 popl %eax | |
49 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax | |
50 btl $26,(%eax) | |
51 jnc L002non_sse2 | |
52 movl $-1,%eax | |
53 movd %eax,%mm7 | |
54 movl 8(%esp),%esi | |
55 movl 12(%esp),%edi | |
56 movl 16(%esp),%ebp | |
57 xorl %edx,%edx | |
58 xorl %ecx,%ecx | |
59 movd (%edi),%mm4 | |
60 movd (%esi),%mm5 | |
61 movd (%ebp),%mm3 | |
62 pmuludq %mm4,%mm5 | |
63 movq %mm5,%mm2 | |
64 movq %mm5,%mm0 | |
65 pand %mm7,%mm0 | |
66 pmuludq 20(%esp),%mm5 | |
67 pmuludq %mm5,%mm3 | |
68 paddq %mm0,%mm3 | |
69 movd 4(%ebp),%mm1 | |
70 movd 4(%esi),%mm0 | |
71 psrlq $32,%mm2 | |
72 psrlq $32,%mm3 | |
73 incl %ecx | |
74 .align 4,0x90 | |
75 L0031st: | |
76 pmuludq %mm4,%mm0 | |
77 pmuludq %mm5,%mm1 | |
78 paddq %mm0,%mm2 | |
79 paddq %mm1,%mm3 | |
80 movq %mm2,%mm0 | |
81 pand %mm7,%mm0 | |
82 movd 4(%ebp,%ecx,4),%mm1 | |
83 paddq %mm0,%mm3 | |
84 movd 4(%esi,%ecx,4),%mm0 | |
85 psrlq $32,%mm2 | |
86 movd %mm3,28(%esp,%ecx,4) | |
87 psrlq $32,%mm3 | |
88 leal 1(%ecx),%ecx | |
89 cmpl %ebx,%ecx | |
90 jl L0031st | |
91 pmuludq %mm4,%mm0 | |
92 pmuludq %mm5,%mm1 | |
93 paddq %mm0,%mm2 | |
94 paddq %mm1,%mm3 | |
95 movq %mm2,%mm0 | |
96 pand %mm7,%mm0 | |
97 paddq %mm0,%mm3 | |
98 movd %mm3,28(%esp,%ecx,4) | |
99 psrlq $32,%mm2 | |
100 psrlq $32,%mm3 | |
101 paddq %mm2,%mm3 | |
102 movq %mm3,32(%esp,%ebx,4) | |
103 incl %edx | |
104 L004outer: | |
105 xorl %ecx,%ecx | |
106 movd (%edi,%edx,4),%mm4 | |
107 movd (%esi),%mm5 | |
108 movd 32(%esp),%mm6 | |
109 movd (%ebp),%mm3 | |
110 pmuludq %mm4,%mm5 | |
111 paddq %mm6,%mm5 | |
112 movq %mm5,%mm0 | |
113 movq %mm5,%mm2 | |
114 pand %mm7,%mm0 | |
115 pmuludq 20(%esp),%mm5 | |
116 pmuludq %mm5,%mm3 | |
117 paddq %mm0,%mm3 | |
118 movd 36(%esp),%mm6 | |
119 movd 4(%ebp),%mm1 | |
120 movd 4(%esi),%mm0 | |
121 psrlq $32,%mm2 | |
122 psrlq $32,%mm3 | |
123 paddq %mm6,%mm2 | |
124 incl %ecx | |
125 decl %ebx | |
126 L005inner: | |
127 pmuludq %mm4,%mm0 | |
128 pmuludq %mm5,%mm1 | |
129 paddq %mm0,%mm2 | |
130 paddq %mm1,%mm3 | |
131 movq %mm2,%mm0 | |
132 movd 36(%esp,%ecx,4),%mm6 | |
133 pand %mm7,%mm0 | |
134 movd 4(%ebp,%ecx,4),%mm1 | |
135 paddq %mm0,%mm3 | |
136 movd 4(%esi,%ecx,4),%mm0 | |
137 psrlq $32,%mm2 | |
138 movd %mm3,28(%esp,%ecx,4) | |
139 psrlq $32,%mm3 | |
140 paddq %mm6,%mm2 | |
141 decl %ebx | |
142 leal 1(%ecx),%ecx | |
143 jnz L005inner | |
144 movl %ecx,%ebx | |
145 pmuludq %mm4,%mm0 | |
146 pmuludq %mm5,%mm1 | |
147 paddq %mm0,%mm2 | |
148 paddq %mm1,%mm3 | |
149 movq %mm2,%mm0 | |
150 pand %mm7,%mm0 | |
151 paddq %mm0,%mm3 | |
152 movd %mm3,28(%esp,%ecx,4) | |
153 psrlq $32,%mm2 | |
154 psrlq $32,%mm3 | |
155 movd 36(%esp,%ebx,4),%mm6 | |
156 paddq %mm2,%mm3 | |
157 paddq %mm6,%mm3 | |
158 movq %mm3,32(%esp,%ebx,4) | |
159 leal 1(%edx),%edx | |
160 cmpl %ebx,%edx | |
161 jle L004outer | |
162 emms | |
163 jmp L006common_tail | |
164 .align 4,0x90 | |
165 L002non_sse2: | |
166 movl 8(%esp),%esi | |
167 leal 1(%ebx),%ebp | |
168 movl 12(%esp),%edi | |
169 xorl %ecx,%ecx | |
170 movl %esi,%edx | |
171 andl $1,%ebp | |
172 subl %edi,%edx | |
173 leal 4(%edi,%ebx,4),%eax | |
174 orl %edx,%ebp | |
175 movl (%edi),%edi | |
176 jz L007bn_sqr_mont | |
177 movl %eax,28(%esp) | |
178 movl (%esi),%eax | |
179 xorl %edx,%edx | |
180 .align 4,0x90 | |
181 L008mull: | |
182 movl %edx,%ebp | |
183 mull %edi | |
184 addl %eax,%ebp | |
185 leal 1(%ecx),%ecx | |
186 adcl $0,%edx | |
187 movl (%esi,%ecx,4),%eax | |
188 cmpl %ebx,%ecx | |
189 movl %ebp,28(%esp,%ecx,4) | |
190 jl L008mull | |
191 movl %edx,%ebp | |
192 mull %edi | |
193 movl 20(%esp),%edi | |
194 addl %ebp,%eax | |
195 movl 16(%esp),%esi | |
196 adcl $0,%edx | |
197 imull 32(%esp),%edi | |
198 movl %eax,32(%esp,%ebx,4) | |
199 xorl %ecx,%ecx | |
200 movl %edx,36(%esp,%ebx,4) | |
201 movl %ecx,40(%esp,%ebx,4) | |
202 movl (%esi),%eax | |
203 mull %edi | |
204 addl 32(%esp),%eax | |
205 movl 4(%esi),%eax | |
206 adcl $0,%edx | |
207 incl %ecx | |
208 jmp L0092ndmadd | |
209 .align 4,0x90 | |
210 L0101stmadd: | |
211 movl %edx,%ebp | |
212 mull %edi | |
213 addl 32(%esp,%ecx,4),%ebp | |
214 leal 1(%ecx),%ecx | |
215 adcl $0,%edx | |
216 addl %eax,%ebp | |
217 movl (%esi,%ecx,4),%eax | |
218 adcl $0,%edx | |
219 cmpl %ebx,%ecx | |
220 movl %ebp,28(%esp,%ecx,4) | |
221 jl L0101stmadd | |
222 movl %edx,%ebp | |
223 mull %edi | |
224 addl 32(%esp,%ebx,4),%eax | |
225 movl 20(%esp),%edi | |
226 adcl $0,%edx | |
227 movl 16(%esp),%esi | |
228 addl %eax,%ebp | |
229 adcl $0,%edx | |
230 imull 32(%esp),%edi | |
231 xorl %ecx,%ecx | |
232 addl 36(%esp,%ebx,4),%edx | |
233 movl %ebp,32(%esp,%ebx,4) | |
234 adcl $0,%ecx | |
235 movl (%esi),%eax | |
236 movl %edx,36(%esp,%ebx,4) | |
237 movl %ecx,40(%esp,%ebx,4) | |
238 mull %edi | |
239 addl 32(%esp),%eax | |
240 movl 4(%esi),%eax | |
241 adcl $0,%edx | |
242 movl $1,%ecx | |
243 .align 4,0x90 | |
244 L0092ndmadd: | |
245 movl %edx,%ebp | |
246 mull %edi | |
247 addl 32(%esp,%ecx,4),%ebp | |
248 leal 1(%ecx),%ecx | |
249 adcl $0,%edx | |
250 addl %eax,%ebp | |
251 movl (%esi,%ecx,4),%eax | |
252 adcl $0,%edx | |
253 cmpl %ebx,%ecx | |
254 movl %ebp,24(%esp,%ecx,4) | |
255 jl L0092ndmadd | |
256 movl %edx,%ebp | |
257 mull %edi | |
258 addl 32(%esp,%ebx,4),%ebp | |
259 adcl $0,%edx | |
260 addl %eax,%ebp | |
261 adcl $0,%edx | |
262 movl %ebp,28(%esp,%ebx,4) | |
263 xorl %eax,%eax | |
264 movl 12(%esp),%ecx | |
265 addl 36(%esp,%ebx,4),%edx | |
266 adcl 40(%esp,%ebx,4),%eax | |
267 leal 4(%ecx),%ecx | |
268 movl %edx,32(%esp,%ebx,4) | |
269 cmpl 28(%esp),%ecx | |
270 movl %eax,36(%esp,%ebx,4) | |
271 je L006common_tail | |
272 movl (%ecx),%edi | |
273 movl 8(%esp),%esi | |
274 movl %ecx,12(%esp) | |
275 xorl %ecx,%ecx | |
276 xorl %edx,%edx | |
277 movl (%esi),%eax | |
278 jmp L0101stmadd | |
279 .align 4,0x90 | |
280 L007bn_sqr_mont: | |
281 movl %ebx,(%esp) | |
282 movl %ecx,12(%esp) | |
283 movl %edi,%eax | |
284 mull %edi | |
285 movl %eax,32(%esp) | |
286 movl %edx,%ebx | |
287 shrl $1,%edx | |
288 andl $1,%ebx | |
289 incl %ecx | |
290 .align 4,0x90 | |
291 L011sqr: | |
292 movl (%esi,%ecx,4),%eax | |
293 movl %edx,%ebp | |
294 mull %edi | |
295 addl %ebp,%eax | |
296 leal 1(%ecx),%ecx | |
297 adcl $0,%edx | |
298 leal (%ebx,%eax,2),%ebp | |
299 shrl $31,%eax | |
300 cmpl (%esp),%ecx | |
301 movl %eax,%ebx | |
302 movl %ebp,28(%esp,%ecx,4) | |
303 jl L011sqr | |
304 movl (%esi,%ecx,4),%eax | |
305 movl %edx,%ebp | |
306 mull %edi | |
307 addl %ebp,%eax | |
308 movl 20(%esp),%edi | |
309 adcl $0,%edx | |
310 movl 16(%esp),%esi | |
311 leal (%ebx,%eax,2),%ebp | |
312 imull 32(%esp),%edi | |
313 shrl $31,%eax | |
314 movl %ebp,32(%esp,%ecx,4) | |
315 leal (%eax,%edx,2),%ebp | |
316 movl (%esi),%eax | |
317 shrl $31,%edx | |
318 movl %ebp,36(%esp,%ecx,4) | |
319 movl %edx,40(%esp,%ecx,4) | |
320 mull %edi | |
321 addl 32(%esp),%eax | |
322 movl %ecx,%ebx | |
323 adcl $0,%edx | |
324 movl 4(%esi),%eax | |
325 movl $1,%ecx | |
326 .align 4,0x90 | |
327 L0123rdmadd: | |
328 movl %edx,%ebp | |
329 mull %edi | |
330 addl 32(%esp,%ecx,4),%ebp | |
331 adcl $0,%edx | |
332 addl %eax,%ebp | |
333 movl 4(%esi,%ecx,4),%eax | |
334 adcl $0,%edx | |
335 movl %ebp,28(%esp,%ecx,4) | |
336 movl %edx,%ebp | |
337 mull %edi | |
338 addl 36(%esp,%ecx,4),%ebp | |
339 leal 2(%ecx),%ecx | |
340 adcl $0,%edx | |
341 addl %eax,%ebp | |
342 movl (%esi,%ecx,4),%eax | |
343 adcl $0,%edx | |
344 cmpl %ebx,%ecx | |
345 movl %ebp,24(%esp,%ecx,4) | |
346 jl L0123rdmadd | |
347 movl %edx,%ebp | |
348 mull %edi | |
349 addl 32(%esp,%ebx,4),%ebp | |
350 adcl $0,%edx | |
351 addl %eax,%ebp | |
352 adcl $0,%edx | |
353 movl %ebp,28(%esp,%ebx,4) | |
354 movl 12(%esp),%ecx | |
355 xorl %eax,%eax | |
356 movl 8(%esp),%esi | |
357 addl 36(%esp,%ebx,4),%edx | |
358 adcl 40(%esp,%ebx,4),%eax | |
359 movl %edx,32(%esp,%ebx,4) | |
360 cmpl %ebx,%ecx | |
361 movl %eax,36(%esp,%ebx,4) | |
362 je L006common_tail | |
363 movl 4(%esi,%ecx,4),%edi | |
364 leal 1(%ecx),%ecx | |
365 movl %edi,%eax | |
366 movl %ecx,12(%esp) | |
367 mull %edi | |
368 addl 32(%esp,%ecx,4),%eax | |
369 adcl $0,%edx | |
370 movl %eax,32(%esp,%ecx,4) | |
371 xorl %ebp,%ebp | |
372 cmpl %ebx,%ecx | |
373 leal 1(%ecx),%ecx | |
374 je L013sqrlast | |
375 movl %edx,%ebx | |
376 shrl $1,%edx | |
377 andl $1,%ebx | |
378 .align 4,0x90 | |
379 L014sqradd: | |
380 movl (%esi,%ecx,4),%eax | |
381 movl %edx,%ebp | |
382 mull %edi | |
383 addl %ebp,%eax | |
384 leal (%eax,%eax,1),%ebp | |
385 adcl $0,%edx | |
386 shrl $31,%eax | |
387 addl 32(%esp,%ecx,4),%ebp | |
388 leal 1(%ecx),%ecx | |
389 adcl $0,%eax | |
390 addl %ebx,%ebp | |
391 adcl $0,%eax | |
392 cmpl (%esp),%ecx | |
393 movl %ebp,28(%esp,%ecx,4) | |
394 movl %eax,%ebx | |
395 jle L014sqradd | |
396 movl %edx,%ebp | |
397 addl %edx,%edx | |
398 shrl $31,%ebp | |
399 addl %ebx,%edx | |
400 adcl $0,%ebp | |
401 L013sqrlast: | |
402 movl 20(%esp),%edi | |
403 movl 16(%esp),%esi | |
404 imull 32(%esp),%edi | |
405 addl 32(%esp,%ecx,4),%edx | |
406 movl (%esi),%eax | |
407 adcl $0,%ebp | |
408 movl %edx,32(%esp,%ecx,4) | |
409 movl %ebp,36(%esp,%ecx,4) | |
410 mull %edi | |
411 addl 32(%esp),%eax | |
412 leal -1(%ecx),%ebx | |
413 adcl $0,%edx | |
414 movl $1,%ecx | |
415 movl 4(%esi),%eax | |
416 jmp L0123rdmadd | |
417 .align 4,0x90 | |
418 L006common_tail: | |
419 movl 16(%esp),%ebp | |
420 movl 4(%esp),%edi | |
421 leal 32(%esp),%esi | |
422 movl (%esi),%eax | |
423 movl %ebx,%ecx | |
424 xorl %edx,%edx | |
425 .align 4,0x90 | |
426 L015sub: | |
427 sbbl (%ebp,%edx,4),%eax | |
428 movl %eax,(%edi,%edx,4) | |
429 decl %ecx | |
430 movl 4(%esi,%edx,4),%eax | |
431 leal 1(%edx),%edx | |
432 jge L015sub | |
433 sbbl $0,%eax | |
434 .align 4,0x90 | |
435 L016copy: | |
436 movl (%esi,%ebx,4),%edx | |
437 movl (%edi,%ebx,4),%ebp | |
438 xorl %ebp,%edx | |
439 andl %eax,%edx | |
440 xorl %ebp,%edx | |
441 movl %ecx,(%esi,%ebx,4) | |
442 movl %edx,(%edi,%ebx,4) | |
443 decl %ebx | |
444 jge L016copy | |
445 movl 24(%esp),%esp | |
446 movl $1,%eax | |
447 L000just_leave: | |
448 popl %edi | |
449 popl %esi | |
450 popl %ebx | |
451 popl %ebp | |
452 ret | |
453 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | |
454 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 | |
455 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 | |
456 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 | |
457 .byte 111,114,103,62,0 | |
458 .section __IMPORT,__pointers,non_lazy_symbol_pointers | |
459 L_OPENSSL_ia32cap_P$non_lazy_ptr: | |
460 .indirect_symbol _OPENSSL_ia32cap_P | |
461 .long 0 | |
462 #endif | |
OLD | NEW |