OLD | NEW |
| (Empty) |
1 #if defined(__i386__) | |
2 .file "chacha-x86.S" | |
3 .text | |
4 .globl _ChaCha20_ctr32 | |
5 .private_extern _ChaCha20_ctr32 | |
6 .align 4 | |
7 _ChaCha20_ctr32: | |
8 L_ChaCha20_ctr32_begin: | |
9 pushl %ebp | |
10 pushl %ebx | |
11 pushl %esi | |
12 pushl %edi | |
13 xorl %eax,%eax | |
14 cmpl 28(%esp),%eax | |
15 je L000no_data | |
16 call Lpic_point | |
17 Lpic_point: | |
18 popl %eax | |
19 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp | |
20 testl $16777216,(%ebp) | |
21 jz L001x86 | |
22 testl $512,4(%ebp) | |
23 jz L001x86 | |
24 jmp Lssse3_shortcut | |
25 L001x86: | |
26 movl 32(%esp),%esi | |
27 movl 36(%esp),%edi | |
28 subl $132,%esp | |
29 movl (%esi),%eax | |
30 movl 4(%esi),%ebx | |
31 movl 8(%esi),%ecx | |
32 movl 12(%esi),%edx | |
33 movl %eax,80(%esp) | |
34 movl %ebx,84(%esp) | |
35 movl %ecx,88(%esp) | |
36 movl %edx,92(%esp) | |
37 movl 16(%esi),%eax | |
38 movl 20(%esi),%ebx | |
39 movl 24(%esi),%ecx | |
40 movl 28(%esi),%edx | |
41 movl %eax,96(%esp) | |
42 movl %ebx,100(%esp) | |
43 movl %ecx,104(%esp) | |
44 movl %edx,108(%esp) | |
45 movl (%edi),%eax | |
46 movl 4(%edi),%ebx | |
47 movl 8(%edi),%ecx | |
48 movl 12(%edi),%edx | |
49 subl $1,%eax | |
50 movl %eax,112(%esp) | |
51 movl %ebx,116(%esp) | |
52 movl %ecx,120(%esp) | |
53 movl %edx,124(%esp) | |
54 jmp L002entry | |
55 .align 4,0x90 | |
56 L003outer_loop: | |
57 movl %ebx,156(%esp) | |
58 movl %eax,152(%esp) | |
59 movl %ecx,160(%esp) | |
60 L002entry: | |
61 movl $1634760805,%eax | |
62 movl $857760878,4(%esp) | |
63 movl $2036477234,8(%esp) | |
64 movl $1797285236,12(%esp) | |
65 movl 84(%esp),%ebx | |
66 movl 88(%esp),%ebp | |
67 movl 104(%esp),%ecx | |
68 movl 108(%esp),%esi | |
69 movl 116(%esp),%edx | |
70 movl 120(%esp),%edi | |
71 movl %ebx,20(%esp) | |
72 movl %ebp,24(%esp) | |
73 movl %ecx,40(%esp) | |
74 movl %esi,44(%esp) | |
75 movl %edx,52(%esp) | |
76 movl %edi,56(%esp) | |
77 movl 92(%esp),%ebx | |
78 movl 124(%esp),%edi | |
79 movl 112(%esp),%edx | |
80 movl 80(%esp),%ebp | |
81 movl 96(%esp),%ecx | |
82 movl 100(%esp),%esi | |
83 addl $1,%edx | |
84 movl %ebx,28(%esp) | |
85 movl %edi,60(%esp) | |
86 movl %edx,112(%esp) | |
87 movl $10,%ebx | |
88 jmp L004loop | |
89 .align 4,0x90 | |
90 L004loop: | |
91 addl %ebp,%eax | |
92 movl %ebx,128(%esp) | |
93 movl %ebp,%ebx | |
94 xorl %eax,%edx | |
95 roll $16,%edx | |
96 addl %edx,%ecx | |
97 xorl %ecx,%ebx | |
98 movl 52(%esp),%edi | |
99 roll $12,%ebx | |
100 movl 20(%esp),%ebp | |
101 addl %ebx,%eax | |
102 xorl %eax,%edx | |
103 movl %eax,(%esp) | |
104 roll $8,%edx | |
105 movl 4(%esp),%eax | |
106 addl %edx,%ecx | |
107 movl %edx,48(%esp) | |
108 xorl %ecx,%ebx | |
109 addl %ebp,%eax | |
110 roll $7,%ebx | |
111 xorl %eax,%edi | |
112 movl %ecx,32(%esp) | |
113 roll $16,%edi | |
114 movl %ebx,16(%esp) | |
115 addl %edi,%esi | |
116 movl 40(%esp),%ecx | |
117 xorl %esi,%ebp | |
118 movl 56(%esp),%edx | |
119 roll $12,%ebp | |
120 movl 24(%esp),%ebx | |
121 addl %ebp,%eax | |
122 xorl %eax,%edi | |
123 movl %eax,4(%esp) | |
124 roll $8,%edi | |
125 movl 8(%esp),%eax | |
126 addl %edi,%esi | |
127 movl %edi,52(%esp) | |
128 xorl %esi,%ebp | |
129 addl %ebx,%eax | |
130 roll $7,%ebp | |
131 xorl %eax,%edx | |
132 movl %esi,36(%esp) | |
133 roll $16,%edx | |
134 movl %ebp,20(%esp) | |
135 addl %edx,%ecx | |
136 movl 44(%esp),%esi | |
137 xorl %ecx,%ebx | |
138 movl 60(%esp),%edi | |
139 roll $12,%ebx | |
140 movl 28(%esp),%ebp | |
141 addl %ebx,%eax | |
142 xorl %eax,%edx | |
143 movl %eax,8(%esp) | |
144 roll $8,%edx | |
145 movl 12(%esp),%eax | |
146 addl %edx,%ecx | |
147 movl %edx,56(%esp) | |
148 xorl %ecx,%ebx | |
149 addl %ebp,%eax | |
150 roll $7,%ebx | |
151 xorl %eax,%edi | |
152 roll $16,%edi | |
153 movl %ebx,24(%esp) | |
154 addl %edi,%esi | |
155 xorl %esi,%ebp | |
156 roll $12,%ebp | |
157 movl 20(%esp),%ebx | |
158 addl %ebp,%eax | |
159 xorl %eax,%edi | |
160 movl %eax,12(%esp) | |
161 roll $8,%edi | |
162 movl (%esp),%eax | |
163 addl %edi,%esi | |
164 movl %edi,%edx | |
165 xorl %esi,%ebp | |
166 addl %ebx,%eax | |
167 roll $7,%ebp | |
168 xorl %eax,%edx | |
169 roll $16,%edx | |
170 movl %ebp,28(%esp) | |
171 addl %edx,%ecx | |
172 xorl %ecx,%ebx | |
173 movl 48(%esp),%edi | |
174 roll $12,%ebx | |
175 movl 24(%esp),%ebp | |
176 addl %ebx,%eax | |
177 xorl %eax,%edx | |
178 movl %eax,(%esp) | |
179 roll $8,%edx | |
180 movl 4(%esp),%eax | |
181 addl %edx,%ecx | |
182 movl %edx,60(%esp) | |
183 xorl %ecx,%ebx | |
184 addl %ebp,%eax | |
185 roll $7,%ebx | |
186 xorl %eax,%edi | |
187 movl %ecx,40(%esp) | |
188 roll $16,%edi | |
189 movl %ebx,20(%esp) | |
190 addl %edi,%esi | |
191 movl 32(%esp),%ecx | |
192 xorl %esi,%ebp | |
193 movl 52(%esp),%edx | |
194 roll $12,%ebp | |
195 movl 28(%esp),%ebx | |
196 addl %ebp,%eax | |
197 xorl %eax,%edi | |
198 movl %eax,4(%esp) | |
199 roll $8,%edi | |
200 movl 8(%esp),%eax | |
201 addl %edi,%esi | |
202 movl %edi,48(%esp) | |
203 xorl %esi,%ebp | |
204 addl %ebx,%eax | |
205 roll $7,%ebp | |
206 xorl %eax,%edx | |
207 movl %esi,44(%esp) | |
208 roll $16,%edx | |
209 movl %ebp,24(%esp) | |
210 addl %edx,%ecx | |
211 movl 36(%esp),%esi | |
212 xorl %ecx,%ebx | |
213 movl 56(%esp),%edi | |
214 roll $12,%ebx | |
215 movl 16(%esp),%ebp | |
216 addl %ebx,%eax | |
217 xorl %eax,%edx | |
218 movl %eax,8(%esp) | |
219 roll $8,%edx | |
220 movl 12(%esp),%eax | |
221 addl %edx,%ecx | |
222 movl %edx,52(%esp) | |
223 xorl %ecx,%ebx | |
224 addl %ebp,%eax | |
225 roll $7,%ebx | |
226 xorl %eax,%edi | |
227 roll $16,%edi | |
228 movl %ebx,28(%esp) | |
229 addl %edi,%esi | |
230 xorl %esi,%ebp | |
231 movl 48(%esp),%edx | |
232 roll $12,%ebp | |
233 movl 128(%esp),%ebx | |
234 addl %ebp,%eax | |
235 xorl %eax,%edi | |
236 movl %eax,12(%esp) | |
237 roll $8,%edi | |
238 movl (%esp),%eax | |
239 addl %edi,%esi | |
240 movl %edi,56(%esp) | |
241 xorl %esi,%ebp | |
242 roll $7,%ebp | |
243 decl %ebx | |
244 jnz L004loop | |
245 movl 160(%esp),%ebx | |
246 addl $1634760805,%eax | |
247 addl 80(%esp),%ebp | |
248 addl 96(%esp),%ecx | |
249 addl 100(%esp),%esi | |
250 cmpl $64,%ebx | |
251 jb L005tail | |
252 movl 156(%esp),%ebx | |
253 addl 112(%esp),%edx | |
254 addl 120(%esp),%edi | |
255 xorl (%ebx),%eax | |
256 xorl 16(%ebx),%ebp | |
257 movl %eax,(%esp) | |
258 movl 152(%esp),%eax | |
259 xorl 32(%ebx),%ecx | |
260 xorl 36(%ebx),%esi | |
261 xorl 48(%ebx),%edx | |
262 xorl 56(%ebx),%edi | |
263 movl %ebp,16(%eax) | |
264 movl %ecx,32(%eax) | |
265 movl %esi,36(%eax) | |
266 movl %edx,48(%eax) | |
267 movl %edi,56(%eax) | |
268 movl 4(%esp),%ebp | |
269 movl 8(%esp),%ecx | |
270 movl 12(%esp),%esi | |
271 movl 20(%esp),%edx | |
272 movl 24(%esp),%edi | |
273 addl $857760878,%ebp | |
274 addl $2036477234,%ecx | |
275 addl $1797285236,%esi | |
276 addl 84(%esp),%edx | |
277 addl 88(%esp),%edi | |
278 xorl 4(%ebx),%ebp | |
279 xorl 8(%ebx),%ecx | |
280 xorl 12(%ebx),%esi | |
281 xorl 20(%ebx),%edx | |
282 xorl 24(%ebx),%edi | |
283 movl %ebp,4(%eax) | |
284 movl %ecx,8(%eax) | |
285 movl %esi,12(%eax) | |
286 movl %edx,20(%eax) | |
287 movl %edi,24(%eax) | |
288 movl 28(%esp),%ebp | |
289 movl 40(%esp),%ecx | |
290 movl 44(%esp),%esi | |
291 movl 52(%esp),%edx | |
292 movl 60(%esp),%edi | |
293 addl 92(%esp),%ebp | |
294 addl 104(%esp),%ecx | |
295 addl 108(%esp),%esi | |
296 addl 116(%esp),%edx | |
297 addl 124(%esp),%edi | |
298 xorl 28(%ebx),%ebp | |
299 xorl 40(%ebx),%ecx | |
300 xorl 44(%ebx),%esi | |
301 xorl 52(%ebx),%edx | |
302 xorl 60(%ebx),%edi | |
303 leal 64(%ebx),%ebx | |
304 movl %ebp,28(%eax) | |
305 movl (%esp),%ebp | |
306 movl %ecx,40(%eax) | |
307 movl 160(%esp),%ecx | |
308 movl %esi,44(%eax) | |
309 movl %edx,52(%eax) | |
310 movl %edi,60(%eax) | |
311 movl %ebp,(%eax) | |
312 leal 64(%eax),%eax | |
313 subl $64,%ecx | |
314 jnz L003outer_loop | |
315 jmp L006done | |
316 L005tail: | |
317 addl 112(%esp),%edx | |
318 addl 120(%esp),%edi | |
319 movl %eax,(%esp) | |
320 movl %ebp,16(%esp) | |
321 movl %ecx,32(%esp) | |
322 movl %esi,36(%esp) | |
323 movl %edx,48(%esp) | |
324 movl %edi,56(%esp) | |
325 movl 4(%esp),%ebp | |
326 movl 8(%esp),%ecx | |
327 movl 12(%esp),%esi | |
328 movl 20(%esp),%edx | |
329 movl 24(%esp),%edi | |
330 addl $857760878,%ebp | |
331 addl $2036477234,%ecx | |
332 addl $1797285236,%esi | |
333 addl 84(%esp),%edx | |
334 addl 88(%esp),%edi | |
335 movl %ebp,4(%esp) | |
336 movl %ecx,8(%esp) | |
337 movl %esi,12(%esp) | |
338 movl %edx,20(%esp) | |
339 movl %edi,24(%esp) | |
340 movl 28(%esp),%ebp | |
341 movl 40(%esp),%ecx | |
342 movl 44(%esp),%esi | |
343 movl 52(%esp),%edx | |
344 movl 60(%esp),%edi | |
345 addl 92(%esp),%ebp | |
346 addl 104(%esp),%ecx | |
347 addl 108(%esp),%esi | |
348 addl 116(%esp),%edx | |
349 addl 124(%esp),%edi | |
350 movl %ebp,28(%esp) | |
351 movl 156(%esp),%ebp | |
352 movl %ecx,40(%esp) | |
353 movl 152(%esp),%ecx | |
354 movl %esi,44(%esp) | |
355 xorl %esi,%esi | |
356 movl %edx,52(%esp) | |
357 movl %edi,60(%esp) | |
358 xorl %eax,%eax | |
359 xorl %edx,%edx | |
360 L007tail_loop: | |
361 movb (%esi,%ebp,1),%al | |
362 movb (%esp,%esi,1),%dl | |
363 leal 1(%esi),%esi | |
364 xorb %dl,%al | |
365 movb %al,-1(%ecx,%esi,1) | |
366 decl %ebx | |
367 jnz L007tail_loop | |
368 L006done: | |
369 addl $132,%esp | |
370 L000no_data: | |
371 popl %edi | |
372 popl %esi | |
373 popl %ebx | |
374 popl %ebp | |
375 ret | |
376 .globl _ChaCha20_ssse3 | |
377 .private_extern _ChaCha20_ssse3 | |
378 .align 4 | |
379 _ChaCha20_ssse3: | |
380 L_ChaCha20_ssse3_begin: | |
381 pushl %ebp | |
382 pushl %ebx | |
383 pushl %esi | |
384 pushl %edi | |
385 Lssse3_shortcut: | |
386 movl 20(%esp),%edi | |
387 movl 24(%esp),%esi | |
388 movl 28(%esp),%ecx | |
389 movl 32(%esp),%edx | |
390 movl 36(%esp),%ebx | |
391 movl %esp,%ebp | |
392 subl $524,%esp | |
393 andl $-64,%esp | |
394 movl %ebp,512(%esp) | |
395 leal Lssse3_data-Lpic_point(%eax),%eax | |
396 movdqu (%ebx),%xmm3 | |
397 cmpl $256,%ecx | |
398 jb L0081x | |
399 movl %edx,516(%esp) | |
400 movl %ebx,520(%esp) | |
401 subl $256,%ecx | |
402 leal 384(%esp),%ebp | |
403 movdqu (%edx),%xmm7 | |
404 pshufd $0,%xmm3,%xmm0 | |
405 pshufd $85,%xmm3,%xmm1 | |
406 pshufd $170,%xmm3,%xmm2 | |
407 pshufd $255,%xmm3,%xmm3 | |
408 paddd 48(%eax),%xmm0 | |
409 pshufd $0,%xmm7,%xmm4 | |
410 pshufd $85,%xmm7,%xmm5 | |
411 psubd 64(%eax),%xmm0 | |
412 pshufd $170,%xmm7,%xmm6 | |
413 pshufd $255,%xmm7,%xmm7 | |
414 movdqa %xmm0,64(%ebp) | |
415 movdqa %xmm1,80(%ebp) | |
416 movdqa %xmm2,96(%ebp) | |
417 movdqa %xmm3,112(%ebp) | |
418 movdqu 16(%edx),%xmm3 | |
419 movdqa %xmm4,-64(%ebp) | |
420 movdqa %xmm5,-48(%ebp) | |
421 movdqa %xmm6,-32(%ebp) | |
422 movdqa %xmm7,-16(%ebp) | |
423 movdqa 32(%eax),%xmm7 | |
424 leal 128(%esp),%ebx | |
425 pshufd $0,%xmm3,%xmm0 | |
426 pshufd $85,%xmm3,%xmm1 | |
427 pshufd $170,%xmm3,%xmm2 | |
428 pshufd $255,%xmm3,%xmm3 | |
429 pshufd $0,%xmm7,%xmm4 | |
430 pshufd $85,%xmm7,%xmm5 | |
431 pshufd $170,%xmm7,%xmm6 | |
432 pshufd $255,%xmm7,%xmm7 | |
433 movdqa %xmm0,(%ebp) | |
434 movdqa %xmm1,16(%ebp) | |
435 movdqa %xmm2,32(%ebp) | |
436 movdqa %xmm3,48(%ebp) | |
437 movdqa %xmm4,-128(%ebp) | |
438 movdqa %xmm5,-112(%ebp) | |
439 movdqa %xmm6,-96(%ebp) | |
440 movdqa %xmm7,-80(%ebp) | |
441 leal 128(%esi),%esi | |
442 leal 128(%edi),%edi | |
443 jmp L009outer_loop | |
444 .align 4,0x90 | |
445 L009outer_loop: | |
446 movdqa -112(%ebp),%xmm1 | |
447 movdqa -96(%ebp),%xmm2 | |
448 movdqa -80(%ebp),%xmm3 | |
449 movdqa -48(%ebp),%xmm5 | |
450 movdqa -32(%ebp),%xmm6 | |
451 movdqa -16(%ebp),%xmm7 | |
452 movdqa %xmm1,-112(%ebx) | |
453 movdqa %xmm2,-96(%ebx) | |
454 movdqa %xmm3,-80(%ebx) | |
455 movdqa %xmm5,-48(%ebx) | |
456 movdqa %xmm6,-32(%ebx) | |
457 movdqa %xmm7,-16(%ebx) | |
458 movdqa 32(%ebp),%xmm2 | |
459 movdqa 48(%ebp),%xmm3 | |
460 movdqa 64(%ebp),%xmm4 | |
461 movdqa 80(%ebp),%xmm5 | |
462 movdqa 96(%ebp),%xmm6 | |
463 movdqa 112(%ebp),%xmm7 | |
464 paddd 64(%eax),%xmm4 | |
465 movdqa %xmm2,32(%ebx) | |
466 movdqa %xmm3,48(%ebx) | |
467 movdqa %xmm4,64(%ebx) | |
468 movdqa %xmm5,80(%ebx) | |
469 movdqa %xmm6,96(%ebx) | |
470 movdqa %xmm7,112(%ebx) | |
471 movdqa %xmm4,64(%ebp) | |
472 movdqa -128(%ebp),%xmm0 | |
473 movdqa %xmm4,%xmm6 | |
474 movdqa -64(%ebp),%xmm3 | |
475 movdqa (%ebp),%xmm4 | |
476 movdqa 16(%ebp),%xmm5 | |
477 movl $10,%edx | |
478 nop | |
479 .align 4,0x90 | |
480 L010loop: | |
481 paddd %xmm3,%xmm0 | |
482 movdqa %xmm3,%xmm2 | |
483 pxor %xmm0,%xmm6 | |
484 pshufb (%eax),%xmm6 | |
485 paddd %xmm6,%xmm4 | |
486 pxor %xmm4,%xmm2 | |
487 movdqa -48(%ebx),%xmm3 | |
488 movdqa %xmm2,%xmm1 | |
489 pslld $12,%xmm2 | |
490 psrld $20,%xmm1 | |
491 por %xmm1,%xmm2 | |
492 movdqa -112(%ebx),%xmm1 | |
493 paddd %xmm2,%xmm0 | |
494 movdqa 80(%ebx),%xmm7 | |
495 pxor %xmm0,%xmm6 | |
496 movdqa %xmm0,-128(%ebx) | |
497 pshufb 16(%eax),%xmm6 | |
498 paddd %xmm6,%xmm4 | |
499 movdqa %xmm6,64(%ebx) | |
500 pxor %xmm4,%xmm2 | |
501 paddd %xmm3,%xmm1 | |
502 movdqa %xmm2,%xmm0 | |
503 pslld $7,%xmm2 | |
504 psrld $25,%xmm0 | |
505 pxor %xmm1,%xmm7 | |
506 por %xmm0,%xmm2 | |
507 movdqa %xmm4,(%ebx) | |
508 pshufb (%eax),%xmm7 | |
509 movdqa %xmm2,-64(%ebx) | |
510 paddd %xmm7,%xmm5 | |
511 movdqa 32(%ebx),%xmm4 | |
512 pxor %xmm5,%xmm3 | |
513 movdqa -32(%ebx),%xmm2 | |
514 movdqa %xmm3,%xmm0 | |
515 pslld $12,%xmm3 | |
516 psrld $20,%xmm0 | |
517 por %xmm0,%xmm3 | |
518 movdqa -96(%ebx),%xmm0 | |
519 paddd %xmm3,%xmm1 | |
520 movdqa 96(%ebx),%xmm6 | |
521 pxor %xmm1,%xmm7 | |
522 movdqa %xmm1,-112(%ebx) | |
523 pshufb 16(%eax),%xmm7 | |
524 paddd %xmm7,%xmm5 | |
525 movdqa %xmm7,80(%ebx) | |
526 pxor %xmm5,%xmm3 | |
527 paddd %xmm2,%xmm0 | |
528 movdqa %xmm3,%xmm1 | |
529 pslld $7,%xmm3 | |
530 psrld $25,%xmm1 | |
531 pxor %xmm0,%xmm6 | |
532 por %xmm1,%xmm3 | |
533 movdqa %xmm5,16(%ebx) | |
534 pshufb (%eax),%xmm6 | |
535 movdqa %xmm3,-48(%ebx) | |
536 paddd %xmm6,%xmm4 | |
537 movdqa 48(%ebx),%xmm5 | |
538 pxor %xmm4,%xmm2 | |
539 movdqa -16(%ebx),%xmm3 | |
540 movdqa %xmm2,%xmm1 | |
541 pslld $12,%xmm2 | |
542 psrld $20,%xmm1 | |
543 por %xmm1,%xmm2 | |
544 movdqa -80(%ebx),%xmm1 | |
545 paddd %xmm2,%xmm0 | |
546 movdqa 112(%ebx),%xmm7 | |
547 pxor %xmm0,%xmm6 | |
548 movdqa %xmm0,-96(%ebx) | |
549 pshufb 16(%eax),%xmm6 | |
550 paddd %xmm6,%xmm4 | |
551 movdqa %xmm6,96(%ebx) | |
552 pxor %xmm4,%xmm2 | |
553 paddd %xmm3,%xmm1 | |
554 movdqa %xmm2,%xmm0 | |
555 pslld $7,%xmm2 | |
556 psrld $25,%xmm0 | |
557 pxor %xmm1,%xmm7 | |
558 por %xmm0,%xmm2 | |
559 pshufb (%eax),%xmm7 | |
560 movdqa %xmm2,-32(%ebx) | |
561 paddd %xmm7,%xmm5 | |
562 pxor %xmm5,%xmm3 | |
563 movdqa -48(%ebx),%xmm2 | |
564 movdqa %xmm3,%xmm0 | |
565 pslld $12,%xmm3 | |
566 psrld $20,%xmm0 | |
567 por %xmm0,%xmm3 | |
568 movdqa -128(%ebx),%xmm0 | |
569 paddd %xmm3,%xmm1 | |
570 pxor %xmm1,%xmm7 | |
571 movdqa %xmm1,-80(%ebx) | |
572 pshufb 16(%eax),%xmm7 | |
573 paddd %xmm7,%xmm5 | |
574 movdqa %xmm7,%xmm6 | |
575 pxor %xmm5,%xmm3 | |
576 paddd %xmm2,%xmm0 | |
577 movdqa %xmm3,%xmm1 | |
578 pslld $7,%xmm3 | |
579 psrld $25,%xmm1 | |
580 pxor %xmm0,%xmm6 | |
581 por %xmm1,%xmm3 | |
582 pshufb (%eax),%xmm6 | |
583 movdqa %xmm3,-16(%ebx) | |
584 paddd %xmm6,%xmm4 | |
585 pxor %xmm4,%xmm2 | |
586 movdqa -32(%ebx),%xmm3 | |
587 movdqa %xmm2,%xmm1 | |
588 pslld $12,%xmm2 | |
589 psrld $20,%xmm1 | |
590 por %xmm1,%xmm2 | |
591 movdqa -112(%ebx),%xmm1 | |
592 paddd %xmm2,%xmm0 | |
593 movdqa 64(%ebx),%xmm7 | |
594 pxor %xmm0,%xmm6 | |
595 movdqa %xmm0,-128(%ebx) | |
596 pshufb 16(%eax),%xmm6 | |
597 paddd %xmm6,%xmm4 | |
598 movdqa %xmm6,112(%ebx) | |
599 pxor %xmm4,%xmm2 | |
600 paddd %xmm3,%xmm1 | |
601 movdqa %xmm2,%xmm0 | |
602 pslld $7,%xmm2 | |
603 psrld $25,%xmm0 | |
604 pxor %xmm1,%xmm7 | |
605 por %xmm0,%xmm2 | |
606 movdqa %xmm4,32(%ebx) | |
607 pshufb (%eax),%xmm7 | |
608 movdqa %xmm2,-48(%ebx) | |
609 paddd %xmm7,%xmm5 | |
610 movdqa (%ebx),%xmm4 | |
611 pxor %xmm5,%xmm3 | |
612 movdqa -16(%ebx),%xmm2 | |
613 movdqa %xmm3,%xmm0 | |
614 pslld $12,%xmm3 | |
615 psrld $20,%xmm0 | |
616 por %xmm0,%xmm3 | |
617 movdqa -96(%ebx),%xmm0 | |
618 paddd %xmm3,%xmm1 | |
619 movdqa 80(%ebx),%xmm6 | |
620 pxor %xmm1,%xmm7 | |
621 movdqa %xmm1,-112(%ebx) | |
622 pshufb 16(%eax),%xmm7 | |
623 paddd %xmm7,%xmm5 | |
624 movdqa %xmm7,64(%ebx) | |
625 pxor %xmm5,%xmm3 | |
626 paddd %xmm2,%xmm0 | |
627 movdqa %xmm3,%xmm1 | |
628 pslld $7,%xmm3 | |
629 psrld $25,%xmm1 | |
630 pxor %xmm0,%xmm6 | |
631 por %xmm1,%xmm3 | |
632 movdqa %xmm5,48(%ebx) | |
633 pshufb (%eax),%xmm6 | |
634 movdqa %xmm3,-32(%ebx) | |
635 paddd %xmm6,%xmm4 | |
636 movdqa 16(%ebx),%xmm5 | |
637 pxor %xmm4,%xmm2 | |
638 movdqa -64(%ebx),%xmm3 | |
639 movdqa %xmm2,%xmm1 | |
640 pslld $12,%xmm2 | |
641 psrld $20,%xmm1 | |
642 por %xmm1,%xmm2 | |
643 movdqa -80(%ebx),%xmm1 | |
644 paddd %xmm2,%xmm0 | |
645 movdqa 96(%ebx),%xmm7 | |
646 pxor %xmm0,%xmm6 | |
647 movdqa %xmm0,-96(%ebx) | |
648 pshufb 16(%eax),%xmm6 | |
649 paddd %xmm6,%xmm4 | |
650 movdqa %xmm6,80(%ebx) | |
651 pxor %xmm4,%xmm2 | |
652 paddd %xmm3,%xmm1 | |
653 movdqa %xmm2,%xmm0 | |
654 pslld $7,%xmm2 | |
655 psrld $25,%xmm0 | |
656 pxor %xmm1,%xmm7 | |
657 por %xmm0,%xmm2 | |
658 pshufb (%eax),%xmm7 | |
659 movdqa %xmm2,-16(%ebx) | |
660 paddd %xmm7,%xmm5 | |
661 pxor %xmm5,%xmm3 | |
662 movdqa %xmm3,%xmm0 | |
663 pslld $12,%xmm3 | |
664 psrld $20,%xmm0 | |
665 por %xmm0,%xmm3 | |
666 movdqa -128(%ebx),%xmm0 | |
667 paddd %xmm3,%xmm1 | |
668 movdqa 64(%ebx),%xmm6 | |
669 pxor %xmm1,%xmm7 | |
670 movdqa %xmm1,-80(%ebx) | |
671 pshufb 16(%eax),%xmm7 | |
672 paddd %xmm7,%xmm5 | |
673 movdqa %xmm7,96(%ebx) | |
674 pxor %xmm5,%xmm3 | |
675 movdqa %xmm3,%xmm1 | |
676 pslld $7,%xmm3 | |
677 psrld $25,%xmm1 | |
678 por %xmm1,%xmm3 | |
679 decl %edx | |
680 jnz L010loop | |
681 movdqa %xmm3,-64(%ebx) | |
682 movdqa %xmm4,(%ebx) | |
683 movdqa %xmm5,16(%ebx) | |
684 movdqa %xmm6,64(%ebx) | |
685 movdqa %xmm7,96(%ebx) | |
686 movdqa -112(%ebx),%xmm1 | |
687 movdqa -96(%ebx),%xmm2 | |
688 movdqa -80(%ebx),%xmm3 | |
689 paddd -128(%ebp),%xmm0 | |
690 paddd -112(%ebp),%xmm1 | |
691 paddd -96(%ebp),%xmm2 | |
692 paddd -80(%ebp),%xmm3 | |
693 movdqa %xmm0,%xmm6 | |
694 punpckldq %xmm1,%xmm0 | |
695 movdqa %xmm2,%xmm7 | |
696 punpckldq %xmm3,%xmm2 | |
697 punpckhdq %xmm1,%xmm6 | |
698 punpckhdq %xmm3,%xmm7 | |
699 movdqa %xmm0,%xmm1 | |
700 punpcklqdq %xmm2,%xmm0 | |
701 movdqa %xmm6,%xmm3 | |
702 punpcklqdq %xmm7,%xmm6 | |
703 punpckhqdq %xmm2,%xmm1 | |
704 punpckhqdq %xmm7,%xmm3 | |
705 movdqu -128(%esi),%xmm4 | |
706 movdqu -64(%esi),%xmm5 | |
707 movdqu (%esi),%xmm2 | |
708 movdqu 64(%esi),%xmm7 | |
709 leal 16(%esi),%esi | |
710 pxor %xmm0,%xmm4 | |
711 movdqa -64(%ebx),%xmm0 | |
712 pxor %xmm1,%xmm5 | |
713 movdqa -48(%ebx),%xmm1 | |
714 pxor %xmm2,%xmm6 | |
715 movdqa -32(%ebx),%xmm2 | |
716 pxor %xmm3,%xmm7 | |
717 movdqa -16(%ebx),%xmm3 | |
718 movdqu %xmm4,-128(%edi) | |
719 movdqu %xmm5,-64(%edi) | |
720 movdqu %xmm6,(%edi) | |
721 movdqu %xmm7,64(%edi) | |
722 leal 16(%edi),%edi | |
723 paddd -64(%ebp),%xmm0 | |
724 paddd -48(%ebp),%xmm1 | |
725 paddd -32(%ebp),%xmm2 | |
726 paddd -16(%ebp),%xmm3 | |
727 movdqa %xmm0,%xmm6 | |
728 punpckldq %xmm1,%xmm0 | |
729 movdqa %xmm2,%xmm7 | |
730 punpckldq %xmm3,%xmm2 | |
731 punpckhdq %xmm1,%xmm6 | |
732 punpckhdq %xmm3,%xmm7 | |
733 movdqa %xmm0,%xmm1 | |
734 punpcklqdq %xmm2,%xmm0 | |
735 movdqa %xmm6,%xmm3 | |
736 punpcklqdq %xmm7,%xmm6 | |
737 punpckhqdq %xmm2,%xmm1 | |
738 punpckhqdq %xmm7,%xmm3 | |
739 movdqu -128(%esi),%xmm4 | |
740 movdqu -64(%esi),%xmm5 | |
741 movdqu (%esi),%xmm2 | |
742 movdqu 64(%esi),%xmm7 | |
743 leal 16(%esi),%esi | |
744 pxor %xmm0,%xmm4 | |
745 movdqa (%ebx),%xmm0 | |
746 pxor %xmm1,%xmm5 | |
747 movdqa 16(%ebx),%xmm1 | |
748 pxor %xmm2,%xmm6 | |
749 movdqa 32(%ebx),%xmm2 | |
750 pxor %xmm3,%xmm7 | |
751 movdqa 48(%ebx),%xmm3 | |
752 movdqu %xmm4,-128(%edi) | |
753 movdqu %xmm5,-64(%edi) | |
754 movdqu %xmm6,(%edi) | |
755 movdqu %xmm7,64(%edi) | |
756 leal 16(%edi),%edi | |
757 paddd (%ebp),%xmm0 | |
758 paddd 16(%ebp),%xmm1 | |
759 paddd 32(%ebp),%xmm2 | |
760 paddd 48(%ebp),%xmm3 | |
761 movdqa %xmm0,%xmm6 | |
762 punpckldq %xmm1,%xmm0 | |
763 movdqa %xmm2,%xmm7 | |
764 punpckldq %xmm3,%xmm2 | |
765 punpckhdq %xmm1,%xmm6 | |
766 punpckhdq %xmm3,%xmm7 | |
767 movdqa %xmm0,%xmm1 | |
768 punpcklqdq %xmm2,%xmm0 | |
769 movdqa %xmm6,%xmm3 | |
770 punpcklqdq %xmm7,%xmm6 | |
771 punpckhqdq %xmm2,%xmm1 | |
772 punpckhqdq %xmm7,%xmm3 | |
773 movdqu -128(%esi),%xmm4 | |
774 movdqu -64(%esi),%xmm5 | |
775 movdqu (%esi),%xmm2 | |
776 movdqu 64(%esi),%xmm7 | |
777 leal 16(%esi),%esi | |
778 pxor %xmm0,%xmm4 | |
779 movdqa 64(%ebx),%xmm0 | |
780 pxor %xmm1,%xmm5 | |
781 movdqa 80(%ebx),%xmm1 | |
782 pxor %xmm2,%xmm6 | |
783 movdqa 96(%ebx),%xmm2 | |
784 pxor %xmm3,%xmm7 | |
785 movdqa 112(%ebx),%xmm3 | |
786 movdqu %xmm4,-128(%edi) | |
787 movdqu %xmm5,-64(%edi) | |
788 movdqu %xmm6,(%edi) | |
789 movdqu %xmm7,64(%edi) | |
790 leal 16(%edi),%edi | |
791 paddd 64(%ebp),%xmm0 | |
792 paddd 80(%ebp),%xmm1 | |
793 paddd 96(%ebp),%xmm2 | |
794 paddd 112(%ebp),%xmm3 | |
795 movdqa %xmm0,%xmm6 | |
796 punpckldq %xmm1,%xmm0 | |
797 movdqa %xmm2,%xmm7 | |
798 punpckldq %xmm3,%xmm2 | |
799 punpckhdq %xmm1,%xmm6 | |
800 punpckhdq %xmm3,%xmm7 | |
801 movdqa %xmm0,%xmm1 | |
802 punpcklqdq %xmm2,%xmm0 | |
803 movdqa %xmm6,%xmm3 | |
804 punpcklqdq %xmm7,%xmm6 | |
805 punpckhqdq %xmm2,%xmm1 | |
806 punpckhqdq %xmm7,%xmm3 | |
807 movdqu -128(%esi),%xmm4 | |
808 movdqu -64(%esi),%xmm5 | |
809 movdqu (%esi),%xmm2 | |
810 movdqu 64(%esi),%xmm7 | |
811 leal 208(%esi),%esi | |
812 pxor %xmm0,%xmm4 | |
813 pxor %xmm1,%xmm5 | |
814 pxor %xmm2,%xmm6 | |
815 pxor %xmm3,%xmm7 | |
816 movdqu %xmm4,-128(%edi) | |
817 movdqu %xmm5,-64(%edi) | |
818 movdqu %xmm6,(%edi) | |
819 movdqu %xmm7,64(%edi) | |
820 leal 208(%edi),%edi | |
821 subl $256,%ecx | |
822 jnc L009outer_loop | |
823 addl $256,%ecx | |
824 jz L011done | |
825 movl 520(%esp),%ebx | |
826 leal -128(%esi),%esi | |
827 movl 516(%esp),%edx | |
828 leal -128(%edi),%edi | |
829 movd 64(%ebp),%xmm2 | |
830 movdqu (%ebx),%xmm3 | |
831 paddd 96(%eax),%xmm2 | |
832 pand 112(%eax),%xmm3 | |
833 por %xmm2,%xmm3 | |
834 L0081x: | |
835 movdqa 32(%eax),%xmm0 | |
836 movdqu (%edx),%xmm1 | |
837 movdqu 16(%edx),%xmm2 | |
838 movdqa (%eax),%xmm6 | |
839 movdqa 16(%eax),%xmm7 | |
840 movl %ebp,48(%esp) | |
841 movdqa %xmm0,(%esp) | |
842 movdqa %xmm1,16(%esp) | |
843 movdqa %xmm2,32(%esp) | |
844 movdqa %xmm3,48(%esp) | |
845 movl $10,%edx | |
846 jmp L012loop1x | |
847 .align 4,0x90 | |
848 L013outer1x: | |
849 movdqa 80(%eax),%xmm3 | |
850 movdqa (%esp),%xmm0 | |
851 movdqa 16(%esp),%xmm1 | |
852 movdqa 32(%esp),%xmm2 | |
853 paddd 48(%esp),%xmm3 | |
854 movl $10,%edx | |
855 movdqa %xmm3,48(%esp) | |
856 jmp L012loop1x | |
857 .align 4,0x90 | |
858 L012loop1x: | |
859 paddd %xmm1,%xmm0 | |
860 pxor %xmm0,%xmm3 | |
861 .byte 102,15,56,0,222 | |
862 paddd %xmm3,%xmm2 | |
863 pxor %xmm2,%xmm1 | |
864 movdqa %xmm1,%xmm4 | |
865 psrld $20,%xmm1 | |
866 pslld $12,%xmm4 | |
867 por %xmm4,%xmm1 | |
868 paddd %xmm1,%xmm0 | |
869 pxor %xmm0,%xmm3 | |
870 .byte 102,15,56,0,223 | |
871 paddd %xmm3,%xmm2 | |
872 pxor %xmm2,%xmm1 | |
873 movdqa %xmm1,%xmm4 | |
874 psrld $25,%xmm1 | |
875 pslld $7,%xmm4 | |
876 por %xmm4,%xmm1 | |
877 pshufd $78,%xmm2,%xmm2 | |
878 pshufd $57,%xmm1,%xmm1 | |
879 pshufd $147,%xmm3,%xmm3 | |
880 nop | |
881 paddd %xmm1,%xmm0 | |
882 pxor %xmm0,%xmm3 | |
883 .byte 102,15,56,0,222 | |
884 paddd %xmm3,%xmm2 | |
885 pxor %xmm2,%xmm1 | |
886 movdqa %xmm1,%xmm4 | |
887 psrld $20,%xmm1 | |
888 pslld $12,%xmm4 | |
889 por %xmm4,%xmm1 | |
890 paddd %xmm1,%xmm0 | |
891 pxor %xmm0,%xmm3 | |
892 .byte 102,15,56,0,223 | |
893 paddd %xmm3,%xmm2 | |
894 pxor %xmm2,%xmm1 | |
895 movdqa %xmm1,%xmm4 | |
896 psrld $25,%xmm1 | |
897 pslld $7,%xmm4 | |
898 por %xmm4,%xmm1 | |
899 pshufd $78,%xmm2,%xmm2 | |
900 pshufd $147,%xmm1,%xmm1 | |
901 pshufd $57,%xmm3,%xmm3 | |
902 decl %edx | |
903 jnz L012loop1x | |
904 paddd (%esp),%xmm0 | |
905 paddd 16(%esp),%xmm1 | |
906 paddd 32(%esp),%xmm2 | |
907 paddd 48(%esp),%xmm3 | |
908 cmpl $64,%ecx | |
909 jb L014tail | |
910 movdqu (%esi),%xmm4 | |
911 movdqu 16(%esi),%xmm5 | |
912 pxor %xmm4,%xmm0 | |
913 movdqu 32(%esi),%xmm4 | |
914 pxor %xmm5,%xmm1 | |
915 movdqu 48(%esi),%xmm5 | |
916 pxor %xmm4,%xmm2 | |
917 pxor %xmm5,%xmm3 | |
918 leal 64(%esi),%esi | |
919 movdqu %xmm0,(%edi) | |
920 movdqu %xmm1,16(%edi) | |
921 movdqu %xmm2,32(%edi) | |
922 movdqu %xmm3,48(%edi) | |
923 leal 64(%edi),%edi | |
924 subl $64,%ecx | |
925 jnz L013outer1x | |
926 jmp L011done | |
927 L014tail: | |
928 movdqa %xmm0,(%esp) | |
929 movdqa %xmm1,16(%esp) | |
930 movdqa %xmm2,32(%esp) | |
931 movdqa %xmm3,48(%esp) | |
932 xorl %eax,%eax | |
933 xorl %edx,%edx | |
934 xorl %ebp,%ebp | |
935 L015tail_loop: | |
936 movb (%esp,%ebp,1),%al | |
937 movb (%esi,%ebp,1),%dl | |
938 leal 1(%ebp),%ebp | |
939 xorb %dl,%al | |
940 movb %al,-1(%edi,%ebp,1) | |
941 decl %ecx | |
942 jnz L015tail_loop | |
943 L011done: | |
944 movl 512(%esp),%esp | |
945 popl %edi | |
946 popl %esi | |
947 popl %ebx | |
948 popl %ebp | |
949 ret | |
950 .align 6,0x90 | |
951 Lssse3_data: | |
952 .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 | |
953 .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 | |
954 .long 1634760805,857760878,2036477234,1797285236 | |
955 .long 0,1,2,3 | |
956 .long 4,4,4,4 | |
957 .long 1,0,0,0 | |
958 .long 4,0,0,0 | |
959 .long 0,-1,-1,-1 | |
960 .align 6,0x90 | |
961 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 | |
962 .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 | |
963 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 | |
964 .byte 114,103,62,0 | |
965 .section __IMPORT,__pointers,non_lazy_symbol_pointers | |
966 L_OPENSSL_ia32cap_P$non_lazy_ptr: | |
967 .indirect_symbol _OPENSSL_ia32cap_P | |
968 .long 0 | |
969 #endif | |
OLD | NEW |