OLD | NEW |
| (Empty) |
1 %ifidn __OUTPUT_FORMAT__,obj | |
2 section code use32 class=code align=64 | |
3 %elifidn __OUTPUT_FORMAT__,win32 | |
4 %ifdef __YASM_VERSION_ID__ | |
5 %if __YASM_VERSION_ID__ < 01010000h | |
6 %error yasm version 1.1.0 or later needed. | |
7 %endif | |
8 ; Yasm automatically includes .00 and complains about redefining it. | |
9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html | |
10 %else | |
11 $@feat.00 equ 1 | |
12 %endif | |
13 section .text code align=64 | |
14 %else | |
15 section .text code | |
16 %endif | |
17 global _ChaCha20_ctr32 | |
18 align 16 | |
19 _ChaCha20_ctr32: | |
20 L$_ChaCha20_ctr32_begin: | |
21 push ebp | |
22 push ebx | |
23 push esi | |
24 push edi | |
25 xor eax,eax | |
26 cmp eax,DWORD [28+esp] | |
27 je NEAR L$000no_data | |
28 call L$pic_point | |
29 L$pic_point: | |
30 pop eax | |
31 lea ebp,[_OPENSSL_ia32cap_P] | |
32 test DWORD [ebp],16777216 | |
33 jz NEAR L$001x86 | |
34 test DWORD [4+ebp],512 | |
35 jz NEAR L$001x86 | |
36 jmp NEAR L$ssse3_shortcut | |
37 L$001x86: | |
38 mov esi,DWORD [32+esp] | |
39 mov edi,DWORD [36+esp] | |
40 sub esp,132 | |
41 mov eax,DWORD [esi] | |
42 mov ebx,DWORD [4+esi] | |
43 mov ecx,DWORD [8+esi] | |
44 mov edx,DWORD [12+esi] | |
45 mov DWORD [80+esp],eax | |
46 mov DWORD [84+esp],ebx | |
47 mov DWORD [88+esp],ecx | |
48 mov DWORD [92+esp],edx | |
49 mov eax,DWORD [16+esi] | |
50 mov ebx,DWORD [20+esi] | |
51 mov ecx,DWORD [24+esi] | |
52 mov edx,DWORD [28+esi] | |
53 mov DWORD [96+esp],eax | |
54 mov DWORD [100+esp],ebx | |
55 mov DWORD [104+esp],ecx | |
56 mov DWORD [108+esp],edx | |
57 mov eax,DWORD [edi] | |
58 mov ebx,DWORD [4+edi] | |
59 mov ecx,DWORD [8+edi] | |
60 mov edx,DWORD [12+edi] | |
61 sub eax,1 | |
62 mov DWORD [112+esp],eax | |
63 mov DWORD [116+esp],ebx | |
64 mov DWORD [120+esp],ecx | |
65 mov DWORD [124+esp],edx | |
66 jmp NEAR L$002entry | |
67 align 16 | |
68 L$003outer_loop: | |
69 mov DWORD [156+esp],ebx | |
70 mov DWORD [152+esp],eax | |
71 mov DWORD [160+esp],ecx | |
72 L$002entry: | |
73 mov eax,1634760805 | |
74 mov DWORD [4+esp],857760878 | |
75 mov DWORD [8+esp],2036477234 | |
76 mov DWORD [12+esp],1797285236 | |
77 mov ebx,DWORD [84+esp] | |
78 mov ebp,DWORD [88+esp] | |
79 mov ecx,DWORD [104+esp] | |
80 mov esi,DWORD [108+esp] | |
81 mov edx,DWORD [116+esp] | |
82 mov edi,DWORD [120+esp] | |
83 mov DWORD [20+esp],ebx | |
84 mov DWORD [24+esp],ebp | |
85 mov DWORD [40+esp],ecx | |
86 mov DWORD [44+esp],esi | |
87 mov DWORD [52+esp],edx | |
88 mov DWORD [56+esp],edi | |
89 mov ebx,DWORD [92+esp] | |
90 mov edi,DWORD [124+esp] | |
91 mov edx,DWORD [112+esp] | |
92 mov ebp,DWORD [80+esp] | |
93 mov ecx,DWORD [96+esp] | |
94 mov esi,DWORD [100+esp] | |
95 add edx,1 | |
96 mov DWORD [28+esp],ebx | |
97 mov DWORD [60+esp],edi | |
98 mov DWORD [112+esp],edx | |
99 mov ebx,10 | |
100 jmp NEAR L$004loop | |
101 align 16 | |
102 L$004loop: | |
103 add eax,ebp | |
104 mov DWORD [128+esp],ebx | |
105 mov ebx,ebp | |
106 xor edx,eax | |
107 rol edx,16 | |
108 add ecx,edx | |
109 xor ebx,ecx | |
110 mov edi,DWORD [52+esp] | |
111 rol ebx,12 | |
112 mov ebp,DWORD [20+esp] | |
113 add eax,ebx | |
114 xor edx,eax | |
115 mov DWORD [esp],eax | |
116 rol edx,8 | |
117 mov eax,DWORD [4+esp] | |
118 add ecx,edx | |
119 mov DWORD [48+esp],edx | |
120 xor ebx,ecx | |
121 add eax,ebp | |
122 rol ebx,7 | |
123 xor edi,eax | |
124 mov DWORD [32+esp],ecx | |
125 rol edi,16 | |
126 mov DWORD [16+esp],ebx | |
127 add esi,edi | |
128 mov ecx,DWORD [40+esp] | |
129 xor ebp,esi | |
130 mov edx,DWORD [56+esp] | |
131 rol ebp,12 | |
132 mov ebx,DWORD [24+esp] | |
133 add eax,ebp | |
134 xor edi,eax | |
135 mov DWORD [4+esp],eax | |
136 rol edi,8 | |
137 mov eax,DWORD [8+esp] | |
138 add esi,edi | |
139 mov DWORD [52+esp],edi | |
140 xor ebp,esi | |
141 add eax,ebx | |
142 rol ebp,7 | |
143 xor edx,eax | |
144 mov DWORD [36+esp],esi | |
145 rol edx,16 | |
146 mov DWORD [20+esp],ebp | |
147 add ecx,edx | |
148 mov esi,DWORD [44+esp] | |
149 xor ebx,ecx | |
150 mov edi,DWORD [60+esp] | |
151 rol ebx,12 | |
152 mov ebp,DWORD [28+esp] | |
153 add eax,ebx | |
154 xor edx,eax | |
155 mov DWORD [8+esp],eax | |
156 rol edx,8 | |
157 mov eax,DWORD [12+esp] | |
158 add ecx,edx | |
159 mov DWORD [56+esp],edx | |
160 xor ebx,ecx | |
161 add eax,ebp | |
162 rol ebx,7 | |
163 xor edi,eax | |
164 rol edi,16 | |
165 mov DWORD [24+esp],ebx | |
166 add esi,edi | |
167 xor ebp,esi | |
168 rol ebp,12 | |
169 mov ebx,DWORD [20+esp] | |
170 add eax,ebp | |
171 xor edi,eax | |
172 mov DWORD [12+esp],eax | |
173 rol edi,8 | |
174 mov eax,DWORD [esp] | |
175 add esi,edi | |
176 mov edx,edi | |
177 xor ebp,esi | |
178 add eax,ebx | |
179 rol ebp,7 | |
180 xor edx,eax | |
181 rol edx,16 | |
182 mov DWORD [28+esp],ebp | |
183 add ecx,edx | |
184 xor ebx,ecx | |
185 mov edi,DWORD [48+esp] | |
186 rol ebx,12 | |
187 mov ebp,DWORD [24+esp] | |
188 add eax,ebx | |
189 xor edx,eax | |
190 mov DWORD [esp],eax | |
191 rol edx,8 | |
192 mov eax,DWORD [4+esp] | |
193 add ecx,edx | |
194 mov DWORD [60+esp],edx | |
195 xor ebx,ecx | |
196 add eax,ebp | |
197 rol ebx,7 | |
198 xor edi,eax | |
199 mov DWORD [40+esp],ecx | |
200 rol edi,16 | |
201 mov DWORD [20+esp],ebx | |
202 add esi,edi | |
203 mov ecx,DWORD [32+esp] | |
204 xor ebp,esi | |
205 mov edx,DWORD [52+esp] | |
206 rol ebp,12 | |
207 mov ebx,DWORD [28+esp] | |
208 add eax,ebp | |
209 xor edi,eax | |
210 mov DWORD [4+esp],eax | |
211 rol edi,8 | |
212 mov eax,DWORD [8+esp] | |
213 add esi,edi | |
214 mov DWORD [48+esp],edi | |
215 xor ebp,esi | |
216 add eax,ebx | |
217 rol ebp,7 | |
218 xor edx,eax | |
219 mov DWORD [44+esp],esi | |
220 rol edx,16 | |
221 mov DWORD [24+esp],ebp | |
222 add ecx,edx | |
223 mov esi,DWORD [36+esp] | |
224 xor ebx,ecx | |
225 mov edi,DWORD [56+esp] | |
226 rol ebx,12 | |
227 mov ebp,DWORD [16+esp] | |
228 add eax,ebx | |
229 xor edx,eax | |
230 mov DWORD [8+esp],eax | |
231 rol edx,8 | |
232 mov eax,DWORD [12+esp] | |
233 add ecx,edx | |
234 mov DWORD [52+esp],edx | |
235 xor ebx,ecx | |
236 add eax,ebp | |
237 rol ebx,7 | |
238 xor edi,eax | |
239 rol edi,16 | |
240 mov DWORD [28+esp],ebx | |
241 add esi,edi | |
242 xor ebp,esi | |
243 mov edx,DWORD [48+esp] | |
244 rol ebp,12 | |
245 mov ebx,DWORD [128+esp] | |
246 add eax,ebp | |
247 xor edi,eax | |
248 mov DWORD [12+esp],eax | |
249 rol edi,8 | |
250 mov eax,DWORD [esp] | |
251 add esi,edi | |
252 mov DWORD [56+esp],edi | |
253 xor ebp,esi | |
254 rol ebp,7 | |
255 dec ebx | |
256 jnz NEAR L$004loop | |
257 mov ebx,DWORD [160+esp] | |
258 add eax,1634760805 | |
259 add ebp,DWORD [80+esp] | |
260 add ecx,DWORD [96+esp] | |
261 add esi,DWORD [100+esp] | |
262 cmp ebx,64 | |
263 jb NEAR L$005tail | |
264 mov ebx,DWORD [156+esp] | |
265 add edx,DWORD [112+esp] | |
266 add edi,DWORD [120+esp] | |
267 xor eax,DWORD [ebx] | |
268 xor ebp,DWORD [16+ebx] | |
269 mov DWORD [esp],eax | |
270 mov eax,DWORD [152+esp] | |
271 xor ecx,DWORD [32+ebx] | |
272 xor esi,DWORD [36+ebx] | |
273 xor edx,DWORD [48+ebx] | |
274 xor edi,DWORD [56+ebx] | |
275 mov DWORD [16+eax],ebp | |
276 mov DWORD [32+eax],ecx | |
277 mov DWORD [36+eax],esi | |
278 mov DWORD [48+eax],edx | |
279 mov DWORD [56+eax],edi | |
280 mov ebp,DWORD [4+esp] | |
281 mov ecx,DWORD [8+esp] | |
282 mov esi,DWORD [12+esp] | |
283 mov edx,DWORD [20+esp] | |
284 mov edi,DWORD [24+esp] | |
285 add ebp,857760878 | |
286 add ecx,2036477234 | |
287 add esi,1797285236 | |
288 add edx,DWORD [84+esp] | |
289 add edi,DWORD [88+esp] | |
290 xor ebp,DWORD [4+ebx] | |
291 xor ecx,DWORD [8+ebx] | |
292 xor esi,DWORD [12+ebx] | |
293 xor edx,DWORD [20+ebx] | |
294 xor edi,DWORD [24+ebx] | |
295 mov DWORD [4+eax],ebp | |
296 mov DWORD [8+eax],ecx | |
297 mov DWORD [12+eax],esi | |
298 mov DWORD [20+eax],edx | |
299 mov DWORD [24+eax],edi | |
300 mov ebp,DWORD [28+esp] | |
301 mov ecx,DWORD [40+esp] | |
302 mov esi,DWORD [44+esp] | |
303 mov edx,DWORD [52+esp] | |
304 mov edi,DWORD [60+esp] | |
305 add ebp,DWORD [92+esp] | |
306 add ecx,DWORD [104+esp] | |
307 add esi,DWORD [108+esp] | |
308 add edx,DWORD [116+esp] | |
309 add edi,DWORD [124+esp] | |
310 xor ebp,DWORD [28+ebx] | |
311 xor ecx,DWORD [40+ebx] | |
312 xor esi,DWORD [44+ebx] | |
313 xor edx,DWORD [52+ebx] | |
314 xor edi,DWORD [60+ebx] | |
315 lea ebx,[64+ebx] | |
316 mov DWORD [28+eax],ebp | |
317 mov ebp,DWORD [esp] | |
318 mov DWORD [40+eax],ecx | |
319 mov ecx,DWORD [160+esp] | |
320 mov DWORD [44+eax],esi | |
321 mov DWORD [52+eax],edx | |
322 mov DWORD [60+eax],edi | |
323 mov DWORD [eax],ebp | |
324 lea eax,[64+eax] | |
325 sub ecx,64 | |
326 jnz NEAR L$003outer_loop | |
327 jmp NEAR L$006done | |
328 L$005tail: | |
329 add edx,DWORD [112+esp] | |
330 add edi,DWORD [120+esp] | |
331 mov DWORD [esp],eax | |
332 mov DWORD [16+esp],ebp | |
333 mov DWORD [32+esp],ecx | |
334 mov DWORD [36+esp],esi | |
335 mov DWORD [48+esp],edx | |
336 mov DWORD [56+esp],edi | |
337 mov ebp,DWORD [4+esp] | |
338 mov ecx,DWORD [8+esp] | |
339 mov esi,DWORD [12+esp] | |
340 mov edx,DWORD [20+esp] | |
341 mov edi,DWORD [24+esp] | |
342 add ebp,857760878 | |
343 add ecx,2036477234 | |
344 add esi,1797285236 | |
345 add edx,DWORD [84+esp] | |
346 add edi,DWORD [88+esp] | |
347 mov DWORD [4+esp],ebp | |
348 mov DWORD [8+esp],ecx | |
349 mov DWORD [12+esp],esi | |
350 mov DWORD [20+esp],edx | |
351 mov DWORD [24+esp],edi | |
352 mov ebp,DWORD [28+esp] | |
353 mov ecx,DWORD [40+esp] | |
354 mov esi,DWORD [44+esp] | |
355 mov edx,DWORD [52+esp] | |
356 mov edi,DWORD [60+esp] | |
357 add ebp,DWORD [92+esp] | |
358 add ecx,DWORD [104+esp] | |
359 add esi,DWORD [108+esp] | |
360 add edx,DWORD [116+esp] | |
361 add edi,DWORD [124+esp] | |
362 mov DWORD [28+esp],ebp | |
363 mov ebp,DWORD [156+esp] | |
364 mov DWORD [40+esp],ecx | |
365 mov ecx,DWORD [152+esp] | |
366 mov DWORD [44+esp],esi | |
367 xor esi,esi | |
368 mov DWORD [52+esp],edx | |
369 mov DWORD [60+esp],edi | |
370 xor eax,eax | |
371 xor edx,edx | |
372 L$007tail_loop: | |
373 mov al,BYTE [ebp*1+esi] | |
374 mov dl,BYTE [esi*1+esp] | |
375 lea esi,[1+esi] | |
376 xor al,dl | |
377 mov BYTE [esi*1+ecx-1],al | |
378 dec ebx | |
379 jnz NEAR L$007tail_loop | |
380 L$006done: | |
381 add esp,132 | |
382 L$000no_data: | |
383 pop edi | |
384 pop esi | |
385 pop ebx | |
386 pop ebp | |
387 ret | |
388 global _ChaCha20_ssse3 | |
389 align 16 | |
390 _ChaCha20_ssse3: | |
391 L$_ChaCha20_ssse3_begin: | |
392 push ebp | |
393 push ebx | |
394 push esi | |
395 push edi | |
396 L$ssse3_shortcut: | |
397 mov edi,DWORD [20+esp] | |
398 mov esi,DWORD [24+esp] | |
399 mov ecx,DWORD [28+esp] | |
400 mov edx,DWORD [32+esp] | |
401 mov ebx,DWORD [36+esp] | |
402 mov ebp,esp | |
403 sub esp,524 | |
404 and esp,-64 | |
405 mov DWORD [512+esp],ebp | |
406 lea eax,[(L$ssse3_data-L$pic_point)+eax] | |
407 movdqu xmm3,[ebx] | |
408 cmp ecx,256 | |
409 jb NEAR L$0081x | |
410 mov DWORD [516+esp],edx | |
411 mov DWORD [520+esp],ebx | |
412 sub ecx,256 | |
413 lea ebp,[384+esp] | |
414 movdqu xmm7,[edx] | |
415 pshufd xmm0,xmm3,0 | |
416 pshufd xmm1,xmm3,85 | |
417 pshufd xmm2,xmm3,170 | |
418 pshufd xmm3,xmm3,255 | |
419 paddd xmm0,[48+eax] | |
420 pshufd xmm4,xmm7,0 | |
421 pshufd xmm5,xmm7,85 | |
422 psubd xmm0,[64+eax] | |
423 pshufd xmm6,xmm7,170 | |
424 pshufd xmm7,xmm7,255 | |
425 movdqa [64+ebp],xmm0 | |
426 movdqa [80+ebp],xmm1 | |
427 movdqa [96+ebp],xmm2 | |
428 movdqa [112+ebp],xmm3 | |
429 movdqu xmm3,[16+edx] | |
430 movdqa [ebp-64],xmm4 | |
431 movdqa [ebp-48],xmm5 | |
432 movdqa [ebp-32],xmm6 | |
433 movdqa [ebp-16],xmm7 | |
434 movdqa xmm7,[32+eax] | |
435 lea ebx,[128+esp] | |
436 pshufd xmm0,xmm3,0 | |
437 pshufd xmm1,xmm3,85 | |
438 pshufd xmm2,xmm3,170 | |
439 pshufd xmm3,xmm3,255 | |
440 pshufd xmm4,xmm7,0 | |
441 pshufd xmm5,xmm7,85 | |
442 pshufd xmm6,xmm7,170 | |
443 pshufd xmm7,xmm7,255 | |
444 movdqa [ebp],xmm0 | |
445 movdqa [16+ebp],xmm1 | |
446 movdqa [32+ebp],xmm2 | |
447 movdqa [48+ebp],xmm3 | |
448 movdqa [ebp-128],xmm4 | |
449 movdqa [ebp-112],xmm5 | |
450 movdqa [ebp-96],xmm6 | |
451 movdqa [ebp-80],xmm7 | |
452 lea esi,[128+esi] | |
453 lea edi,[128+edi] | |
454 jmp NEAR L$009outer_loop | |
455 align 16 | |
456 L$009outer_loop: | |
457 movdqa xmm1,[ebp-112] | |
458 movdqa xmm2,[ebp-96] | |
459 movdqa xmm3,[ebp-80] | |
460 movdqa xmm5,[ebp-48] | |
461 movdqa xmm6,[ebp-32] | |
462 movdqa xmm7,[ebp-16] | |
463 movdqa [ebx-112],xmm1 | |
464 movdqa [ebx-96],xmm2 | |
465 movdqa [ebx-80],xmm3 | |
466 movdqa [ebx-48],xmm5 | |
467 movdqa [ebx-32],xmm6 | |
468 movdqa [ebx-16],xmm7 | |
469 movdqa xmm2,[32+ebp] | |
470 movdqa xmm3,[48+ebp] | |
471 movdqa xmm4,[64+ebp] | |
472 movdqa xmm5,[80+ebp] | |
473 movdqa xmm6,[96+ebp] | |
474 movdqa xmm7,[112+ebp] | |
475 paddd xmm4,[64+eax] | |
476 movdqa [32+ebx],xmm2 | |
477 movdqa [48+ebx],xmm3 | |
478 movdqa [64+ebx],xmm4 | |
479 movdqa [80+ebx],xmm5 | |
480 movdqa [96+ebx],xmm6 | |
481 movdqa [112+ebx],xmm7 | |
482 movdqa [64+ebp],xmm4 | |
483 movdqa xmm0,[ebp-128] | |
484 movdqa xmm6,xmm4 | |
485 movdqa xmm3,[ebp-64] | |
486 movdqa xmm4,[ebp] | |
487 movdqa xmm5,[16+ebp] | |
488 mov edx,10 | |
489 nop | |
490 align 16 | |
491 L$010loop: | |
492 paddd xmm0,xmm3 | |
493 movdqa xmm2,xmm3 | |
494 pxor xmm6,xmm0 | |
495 pshufb xmm6,[eax] | |
496 paddd xmm4,xmm6 | |
497 pxor xmm2,xmm4 | |
498 movdqa xmm3,[ebx-48] | |
499 movdqa xmm1,xmm2 | |
500 pslld xmm2,12 | |
501 psrld xmm1,20 | |
502 por xmm2,xmm1 | |
503 movdqa xmm1,[ebx-112] | |
504 paddd xmm0,xmm2 | |
505 movdqa xmm7,[80+ebx] | |
506 pxor xmm6,xmm0 | |
507 movdqa [ebx-128],xmm0 | |
508 pshufb xmm6,[16+eax] | |
509 paddd xmm4,xmm6 | |
510 movdqa [64+ebx],xmm6 | |
511 pxor xmm2,xmm4 | |
512 paddd xmm1,xmm3 | |
513 movdqa xmm0,xmm2 | |
514 pslld xmm2,7 | |
515 psrld xmm0,25 | |
516 pxor xmm7,xmm1 | |
517 por xmm2,xmm0 | |
518 movdqa [ebx],xmm4 | |
519 pshufb xmm7,[eax] | |
520 movdqa [ebx-64],xmm2 | |
521 paddd xmm5,xmm7 | |
522 movdqa xmm4,[32+ebx] | |
523 pxor xmm3,xmm5 | |
524 movdqa xmm2,[ebx-32] | |
525 movdqa xmm0,xmm3 | |
526 pslld xmm3,12 | |
527 psrld xmm0,20 | |
528 por xmm3,xmm0 | |
529 movdqa xmm0,[ebx-96] | |
530 paddd xmm1,xmm3 | |
531 movdqa xmm6,[96+ebx] | |
532 pxor xmm7,xmm1 | |
533 movdqa [ebx-112],xmm1 | |
534 pshufb xmm7,[16+eax] | |
535 paddd xmm5,xmm7 | |
536 movdqa [80+ebx],xmm7 | |
537 pxor xmm3,xmm5 | |
538 paddd xmm0,xmm2 | |
539 movdqa xmm1,xmm3 | |
540 pslld xmm3,7 | |
541 psrld xmm1,25 | |
542 pxor xmm6,xmm0 | |
543 por xmm3,xmm1 | |
544 movdqa [16+ebx],xmm5 | |
545 pshufb xmm6,[eax] | |
546 movdqa [ebx-48],xmm3 | |
547 paddd xmm4,xmm6 | |
548 movdqa xmm5,[48+ebx] | |
549 pxor xmm2,xmm4 | |
550 movdqa xmm3,[ebx-16] | |
551 movdqa xmm1,xmm2 | |
552 pslld xmm2,12 | |
553 psrld xmm1,20 | |
554 por xmm2,xmm1 | |
555 movdqa xmm1,[ebx-80] | |
556 paddd xmm0,xmm2 | |
557 movdqa xmm7,[112+ebx] | |
558 pxor xmm6,xmm0 | |
559 movdqa [ebx-96],xmm0 | |
560 pshufb xmm6,[16+eax] | |
561 paddd xmm4,xmm6 | |
562 movdqa [96+ebx],xmm6 | |
563 pxor xmm2,xmm4 | |
564 paddd xmm1,xmm3 | |
565 movdqa xmm0,xmm2 | |
566 pslld xmm2,7 | |
567 psrld xmm0,25 | |
568 pxor xmm7,xmm1 | |
569 por xmm2,xmm0 | |
570 pshufb xmm7,[eax] | |
571 movdqa [ebx-32],xmm2 | |
572 paddd xmm5,xmm7 | |
573 pxor xmm3,xmm5 | |
574 movdqa xmm2,[ebx-48] | |
575 movdqa xmm0,xmm3 | |
576 pslld xmm3,12 | |
577 psrld xmm0,20 | |
578 por xmm3,xmm0 | |
579 movdqa xmm0,[ebx-128] | |
580 paddd xmm1,xmm3 | |
581 pxor xmm7,xmm1 | |
582 movdqa [ebx-80],xmm1 | |
583 pshufb xmm7,[16+eax] | |
584 paddd xmm5,xmm7 | |
585 movdqa xmm6,xmm7 | |
586 pxor xmm3,xmm5 | |
587 paddd xmm0,xmm2 | |
588 movdqa xmm1,xmm3 | |
589 pslld xmm3,7 | |
590 psrld xmm1,25 | |
591 pxor xmm6,xmm0 | |
592 por xmm3,xmm1 | |
593 pshufb xmm6,[eax] | |
594 movdqa [ebx-16],xmm3 | |
595 paddd xmm4,xmm6 | |
596 pxor xmm2,xmm4 | |
597 movdqa xmm3,[ebx-32] | |
598 movdqa xmm1,xmm2 | |
599 pslld xmm2,12 | |
600 psrld xmm1,20 | |
601 por xmm2,xmm1 | |
602 movdqa xmm1,[ebx-112] | |
603 paddd xmm0,xmm2 | |
604 movdqa xmm7,[64+ebx] | |
605 pxor xmm6,xmm0 | |
606 movdqa [ebx-128],xmm0 | |
607 pshufb xmm6,[16+eax] | |
608 paddd xmm4,xmm6 | |
609 movdqa [112+ebx],xmm6 | |
610 pxor xmm2,xmm4 | |
611 paddd xmm1,xmm3 | |
612 movdqa xmm0,xmm2 | |
613 pslld xmm2,7 | |
614 psrld xmm0,25 | |
615 pxor xmm7,xmm1 | |
616 por xmm2,xmm0 | |
617 movdqa [32+ebx],xmm4 | |
618 pshufb xmm7,[eax] | |
619 movdqa [ebx-48],xmm2 | |
620 paddd xmm5,xmm7 | |
621 movdqa xmm4,[ebx] | |
622 pxor xmm3,xmm5 | |
623 movdqa xmm2,[ebx-16] | |
624 movdqa xmm0,xmm3 | |
625 pslld xmm3,12 | |
626 psrld xmm0,20 | |
627 por xmm3,xmm0 | |
628 movdqa xmm0,[ebx-96] | |
629 paddd xmm1,xmm3 | |
630 movdqa xmm6,[80+ebx] | |
631 pxor xmm7,xmm1 | |
632 movdqa [ebx-112],xmm1 | |
633 pshufb xmm7,[16+eax] | |
634 paddd xmm5,xmm7 | |
635 movdqa [64+ebx],xmm7 | |
636 pxor xmm3,xmm5 | |
637 paddd xmm0,xmm2 | |
638 movdqa xmm1,xmm3 | |
639 pslld xmm3,7 | |
640 psrld xmm1,25 | |
641 pxor xmm6,xmm0 | |
642 por xmm3,xmm1 | |
643 movdqa [48+ebx],xmm5 | |
644 pshufb xmm6,[eax] | |
645 movdqa [ebx-32],xmm3 | |
646 paddd xmm4,xmm6 | |
647 movdqa xmm5,[16+ebx] | |
648 pxor xmm2,xmm4 | |
649 movdqa xmm3,[ebx-64] | |
650 movdqa xmm1,xmm2 | |
651 pslld xmm2,12 | |
652 psrld xmm1,20 | |
653 por xmm2,xmm1 | |
654 movdqa xmm1,[ebx-80] | |
655 paddd xmm0,xmm2 | |
656 movdqa xmm7,[96+ebx] | |
657 pxor xmm6,xmm0 | |
658 movdqa [ebx-96],xmm0 | |
659 pshufb xmm6,[16+eax] | |
660 paddd xmm4,xmm6 | |
661 movdqa [80+ebx],xmm6 | |
662 pxor xmm2,xmm4 | |
663 paddd xmm1,xmm3 | |
664 movdqa xmm0,xmm2 | |
665 pslld xmm2,7 | |
666 psrld xmm0,25 | |
667 pxor xmm7,xmm1 | |
668 por xmm2,xmm0 | |
669 pshufb xmm7,[eax] | |
670 movdqa [ebx-16],xmm2 | |
671 paddd xmm5,xmm7 | |
672 pxor xmm3,xmm5 | |
673 movdqa xmm0,xmm3 | |
674 pslld xmm3,12 | |
675 psrld xmm0,20 | |
676 por xmm3,xmm0 | |
677 movdqa xmm0,[ebx-128] | |
678 paddd xmm1,xmm3 | |
679 movdqa xmm6,[64+ebx] | |
680 pxor xmm7,xmm1 | |
681 movdqa [ebx-80],xmm1 | |
682 pshufb xmm7,[16+eax] | |
683 paddd xmm5,xmm7 | |
684 movdqa [96+ebx],xmm7 | |
685 pxor xmm3,xmm5 | |
686 movdqa xmm1,xmm3 | |
687 pslld xmm3,7 | |
688 psrld xmm1,25 | |
689 por xmm3,xmm1 | |
690 dec edx | |
691 jnz NEAR L$010loop | |
692 movdqa [ebx-64],xmm3 | |
693 movdqa [ebx],xmm4 | |
694 movdqa [16+ebx],xmm5 | |
695 movdqa [64+ebx],xmm6 | |
696 movdqa [96+ebx],xmm7 | |
697 movdqa xmm1,[ebx-112] | |
698 movdqa xmm2,[ebx-96] | |
699 movdqa xmm3,[ebx-80] | |
700 paddd xmm0,[ebp-128] | |
701 paddd xmm1,[ebp-112] | |
702 paddd xmm2,[ebp-96] | |
703 paddd xmm3,[ebp-80] | |
704 movdqa xmm6,xmm0 | |
705 punpckldq xmm0,xmm1 | |
706 movdqa xmm7,xmm2 | |
707 punpckldq xmm2,xmm3 | |
708 punpckhdq xmm6,xmm1 | |
709 punpckhdq xmm7,xmm3 | |
710 movdqa xmm1,xmm0 | |
711 punpcklqdq xmm0,xmm2 | |
712 movdqa xmm3,xmm6 | |
713 punpcklqdq xmm6,xmm7 | |
714 punpckhqdq xmm1,xmm2 | |
715 punpckhqdq xmm3,xmm7 | |
716 movdqu xmm4,[esi-128] | |
717 movdqu xmm5,[esi-64] | |
718 movdqu xmm2,[esi] | |
719 movdqu xmm7,[64+esi] | |
720 lea esi,[16+esi] | |
721 pxor xmm4,xmm0 | |
722 movdqa xmm0,[ebx-64] | |
723 pxor xmm5,xmm1 | |
724 movdqa xmm1,[ebx-48] | |
725 pxor xmm6,xmm2 | |
726 movdqa xmm2,[ebx-32] | |
727 pxor xmm7,xmm3 | |
728 movdqa xmm3,[ebx-16] | |
729 movdqu [edi-128],xmm4 | |
730 movdqu [edi-64],xmm5 | |
731 movdqu [edi],xmm6 | |
732 movdqu [64+edi],xmm7 | |
733 lea edi,[16+edi] | |
734 paddd xmm0,[ebp-64] | |
735 paddd xmm1,[ebp-48] | |
736 paddd xmm2,[ebp-32] | |
737 paddd xmm3,[ebp-16] | |
738 movdqa xmm6,xmm0 | |
739 punpckldq xmm0,xmm1 | |
740 movdqa xmm7,xmm2 | |
741 punpckldq xmm2,xmm3 | |
742 punpckhdq xmm6,xmm1 | |
743 punpckhdq xmm7,xmm3 | |
744 movdqa xmm1,xmm0 | |
745 punpcklqdq xmm0,xmm2 | |
746 movdqa xmm3,xmm6 | |
747 punpcklqdq xmm6,xmm7 | |
748 punpckhqdq xmm1,xmm2 | |
749 punpckhqdq xmm3,xmm7 | |
750 movdqu xmm4,[esi-128] | |
751 movdqu xmm5,[esi-64] | |
752 movdqu xmm2,[esi] | |
753 movdqu xmm7,[64+esi] | |
754 lea esi,[16+esi] | |
755 pxor xmm4,xmm0 | |
756 movdqa xmm0,[ebx] | |
757 pxor xmm5,xmm1 | |
758 movdqa xmm1,[16+ebx] | |
759 pxor xmm6,xmm2 | |
760 movdqa xmm2,[32+ebx] | |
761 pxor xmm7,xmm3 | |
762 movdqa xmm3,[48+ebx] | |
763 movdqu [edi-128],xmm4 | |
764 movdqu [edi-64],xmm5 | |
765 movdqu [edi],xmm6 | |
766 movdqu [64+edi],xmm7 | |
767 lea edi,[16+edi] | |
768 paddd xmm0,[ebp] | |
769 paddd xmm1,[16+ebp] | |
770 paddd xmm2,[32+ebp] | |
771 paddd xmm3,[48+ebp] | |
772 movdqa xmm6,xmm0 | |
773 punpckldq xmm0,xmm1 | |
774 movdqa xmm7,xmm2 | |
775 punpckldq xmm2,xmm3 | |
776 punpckhdq xmm6,xmm1 | |
777 punpckhdq xmm7,xmm3 | |
778 movdqa xmm1,xmm0 | |
779 punpcklqdq xmm0,xmm2 | |
780 movdqa xmm3,xmm6 | |
781 punpcklqdq xmm6,xmm7 | |
782 punpckhqdq xmm1,xmm2 | |
783 punpckhqdq xmm3,xmm7 | |
784 movdqu xmm4,[esi-128] | |
785 movdqu xmm5,[esi-64] | |
786 movdqu xmm2,[esi] | |
787 movdqu xmm7,[64+esi] | |
788 lea esi,[16+esi] | |
789 pxor xmm4,xmm0 | |
790 movdqa xmm0,[64+ebx] | |
791 pxor xmm5,xmm1 | |
792 movdqa xmm1,[80+ebx] | |
793 pxor xmm6,xmm2 | |
794 movdqa xmm2,[96+ebx] | |
795 pxor xmm7,xmm3 | |
796 movdqa xmm3,[112+ebx] | |
797 movdqu [edi-128],xmm4 | |
798 movdqu [edi-64],xmm5 | |
799 movdqu [edi],xmm6 | |
800 movdqu [64+edi],xmm7 | |
801 lea edi,[16+edi] | |
802 paddd xmm0,[64+ebp] | |
803 paddd xmm1,[80+ebp] | |
804 paddd xmm2,[96+ebp] | |
805 paddd xmm3,[112+ebp] | |
806 movdqa xmm6,xmm0 | |
807 punpckldq xmm0,xmm1 | |
808 movdqa xmm7,xmm2 | |
809 punpckldq xmm2,xmm3 | |
810 punpckhdq xmm6,xmm1 | |
811 punpckhdq xmm7,xmm3 | |
812 movdqa xmm1,xmm0 | |
813 punpcklqdq xmm0,xmm2 | |
814 movdqa xmm3,xmm6 | |
815 punpcklqdq xmm6,xmm7 | |
816 punpckhqdq xmm1,xmm2 | |
817 punpckhqdq xmm3,xmm7 | |
818 movdqu xmm4,[esi-128] | |
819 movdqu xmm5,[esi-64] | |
820 movdqu xmm2,[esi] | |
821 movdqu xmm7,[64+esi] | |
822 lea esi,[208+esi] | |
823 pxor xmm4,xmm0 | |
824 pxor xmm5,xmm1 | |
825 pxor xmm6,xmm2 | |
826 pxor xmm7,xmm3 | |
827 movdqu [edi-128],xmm4 | |
828 movdqu [edi-64],xmm5 | |
829 movdqu [edi],xmm6 | |
830 movdqu [64+edi],xmm7 | |
831 lea edi,[208+edi] | |
832 sub ecx,256 | |
833 jnc NEAR L$009outer_loop | |
834 add ecx,256 | |
835 jz NEAR L$011done | |
836 mov ebx,DWORD [520+esp] | |
837 lea esi,[esi-128] | |
838 mov edx,DWORD [516+esp] | |
839 lea edi,[edi-128] | |
840 movd xmm2,DWORD [64+ebp] | |
841 movdqu xmm3,[ebx] | |
842 paddd xmm2,[96+eax] | |
843 pand xmm3,[112+eax] | |
844 por xmm3,xmm2 | |
845 L$0081x: | |
846 movdqa xmm0,[32+eax] | |
847 movdqu xmm1,[edx] | |
848 movdqu xmm2,[16+edx] | |
849 movdqa xmm6,[eax] | |
850 movdqa xmm7,[16+eax] | |
851 mov DWORD [48+esp],ebp | |
852 movdqa [esp],xmm0 | |
853 movdqa [16+esp],xmm1 | |
854 movdqa [32+esp],xmm2 | |
855 movdqa [48+esp],xmm3 | |
856 mov edx,10 | |
857 jmp NEAR L$012loop1x | |
858 align 16 | |
859 L$013outer1x: | |
860 movdqa xmm3,[80+eax] | |
861 movdqa xmm0,[esp] | |
862 movdqa xmm1,[16+esp] | |
863 movdqa xmm2,[32+esp] | |
864 paddd xmm3,[48+esp] | |
865 mov edx,10 | |
866 movdqa [48+esp],xmm3 | |
867 jmp NEAR L$012loop1x | |
868 align 16 | |
869 L$012loop1x: | |
870 paddd xmm0,xmm1 | |
871 pxor xmm3,xmm0 | |
872 db 102,15,56,0,222 | |
873 paddd xmm2,xmm3 | |
874 pxor xmm1,xmm2 | |
875 movdqa xmm4,xmm1 | |
876 psrld xmm1,20 | |
877 pslld xmm4,12 | |
878 por xmm1,xmm4 | |
879 paddd xmm0,xmm1 | |
880 pxor xmm3,xmm0 | |
881 db 102,15,56,0,223 | |
882 paddd xmm2,xmm3 | |
883 pxor xmm1,xmm2 | |
884 movdqa xmm4,xmm1 | |
885 psrld xmm1,25 | |
886 pslld xmm4,7 | |
887 por xmm1,xmm4 | |
888 pshufd xmm2,xmm2,78 | |
889 pshufd xmm1,xmm1,57 | |
890 pshufd xmm3,xmm3,147 | |
891 nop | |
892 paddd xmm0,xmm1 | |
893 pxor xmm3,xmm0 | |
894 db 102,15,56,0,222 | |
895 paddd xmm2,xmm3 | |
896 pxor xmm1,xmm2 | |
897 movdqa xmm4,xmm1 | |
898 psrld xmm1,20 | |
899 pslld xmm4,12 | |
900 por xmm1,xmm4 | |
901 paddd xmm0,xmm1 | |
902 pxor xmm3,xmm0 | |
903 db 102,15,56,0,223 | |
904 paddd xmm2,xmm3 | |
905 pxor xmm1,xmm2 | |
906 movdqa xmm4,xmm1 | |
907 psrld xmm1,25 | |
908 pslld xmm4,7 | |
909 por xmm1,xmm4 | |
910 pshufd xmm2,xmm2,78 | |
911 pshufd xmm1,xmm1,147 | |
912 pshufd xmm3,xmm3,57 | |
913 dec edx | |
914 jnz NEAR L$012loop1x | |
915 paddd xmm0,[esp] | |
916 paddd xmm1,[16+esp] | |
917 paddd xmm2,[32+esp] | |
918 paddd xmm3,[48+esp] | |
919 cmp ecx,64 | |
920 jb NEAR L$014tail | |
921 movdqu xmm4,[esi] | |
922 movdqu xmm5,[16+esi] | |
923 pxor xmm0,xmm4 | |
924 movdqu xmm4,[32+esi] | |
925 pxor xmm1,xmm5 | |
926 movdqu xmm5,[48+esi] | |
927 pxor xmm2,xmm4 | |
928 pxor xmm3,xmm5 | |
929 lea esi,[64+esi] | |
930 movdqu [edi],xmm0 | |
931 movdqu [16+edi],xmm1 | |
932 movdqu [32+edi],xmm2 | |
933 movdqu [48+edi],xmm3 | |
934 lea edi,[64+edi] | |
935 sub ecx,64 | |
936 jnz NEAR L$013outer1x | |
937 jmp NEAR L$011done | |
938 L$014tail: | |
939 movdqa [esp],xmm0 | |
940 movdqa [16+esp],xmm1 | |
941 movdqa [32+esp],xmm2 | |
942 movdqa [48+esp],xmm3 | |
943 xor eax,eax | |
944 xor edx,edx | |
945 xor ebp,ebp | |
946 L$015tail_loop: | |
947 mov al,BYTE [ebp*1+esp] | |
948 mov dl,BYTE [ebp*1+esi] | |
949 lea ebp,[1+ebp] | |
950 xor al,dl | |
951 mov BYTE [ebp*1+edi-1],al | |
952 dec ecx | |
953 jnz NEAR L$015tail_loop | |
954 L$011done: | |
955 mov esp,DWORD [512+esp] | |
956 pop edi | |
957 pop esi | |
958 pop ebx | |
959 pop ebp | |
960 ret | |
961 align 64 | |
962 L$ssse3_data: | |
963 db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 | |
964 db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 | |
965 dd 1634760805,857760878,2036477234,1797285236 | |
966 dd 0,1,2,3 | |
967 dd 4,4,4,4 | |
968 dd 1,0,0,0 | |
969 dd 4,0,0,0 | |
970 dd 0,-1,-1,-1 | |
971 align 64 | |
972 db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 | |
973 db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 | |
974 db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 | |
975 db 114,103,62,0 | |
976 segment .bss | |
977 common _OPENSSL_ia32cap_P 16 | |
OLD | NEW |