OLD | NEW |
(Empty) | |
| 1 %ifidn __OUTPUT_FORMAT__,obj |
| 2 section code use32 class=code align=64 |
| 3 %elifidn __OUTPUT_FORMAT__,win32 |
| 4 %ifdef __YASM_VERSION_ID__ |
| 5 %if __YASM_VERSION_ID__ < 01010000h |
| 6 %error yasm version 1.1.0 or later needed. |
| 7 %endif |
| 8 ; Yasm automatically includes .00 and complains about redefining it. |
| 9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html |
| 10 %else |
| 11 $@feat.00 equ 1 |
| 12 %endif |
| 13 section .text code align=64 |
| 14 %else |
| 15 section .text code |
| 16 %endif |
| 17 global _ChaCha20_ctr32 |
| 18 align 16 |
| 19 _ChaCha20_ctr32: |
| 20 L$_ChaCha20_ctr32_begin: |
| 21 push ebp |
| 22 push ebx |
| 23 push esi |
| 24 push edi |
| 25 xor eax,eax |
| 26 cmp eax,DWORD [28+esp] |
| 27 je NEAR L$000no_data |
| 28 call L$pic_point |
| 29 L$pic_point: |
| 30 pop eax |
| 31 lea ebp,[_OPENSSL_ia32cap_P] |
| 32 test DWORD [ebp],16777216 |
| 33 jz NEAR L$001x86 |
| 34 test DWORD [4+ebp],512 |
| 35 jz NEAR L$001x86 |
| 36 jmp NEAR L$ssse3_shortcut |
| 37 L$001x86: |
| 38 mov esi,DWORD [32+esp] |
| 39 mov edi,DWORD [36+esp] |
| 40 sub esp,132 |
| 41 mov eax,DWORD [esi] |
| 42 mov ebx,DWORD [4+esi] |
| 43 mov ecx,DWORD [8+esi] |
| 44 mov edx,DWORD [12+esi] |
| 45 mov DWORD [80+esp],eax |
| 46 mov DWORD [84+esp],ebx |
| 47 mov DWORD [88+esp],ecx |
| 48 mov DWORD [92+esp],edx |
| 49 mov eax,DWORD [16+esi] |
| 50 mov ebx,DWORD [20+esi] |
| 51 mov ecx,DWORD [24+esi] |
| 52 mov edx,DWORD [28+esi] |
| 53 mov DWORD [96+esp],eax |
| 54 mov DWORD [100+esp],ebx |
| 55 mov DWORD [104+esp],ecx |
| 56 mov DWORD [108+esp],edx |
| 57 mov eax,DWORD [edi] |
| 58 mov ebx,DWORD [4+edi] |
| 59 mov ecx,DWORD [8+edi] |
| 60 mov edx,DWORD [12+edi] |
| 61 sub eax,1 |
| 62 mov DWORD [112+esp],eax |
| 63 mov DWORD [116+esp],ebx |
| 64 mov DWORD [120+esp],ecx |
| 65 mov DWORD [124+esp],edx |
| 66 jmp NEAR L$002entry |
| 67 align 16 |
| 68 L$003outer_loop: |
| 69 mov DWORD [156+esp],ebx |
| 70 mov DWORD [152+esp],eax |
| 71 mov DWORD [160+esp],ecx |
| 72 L$002entry: |
| 73 mov eax,1634760805 |
| 74 mov DWORD [4+esp],857760878 |
| 75 mov DWORD [8+esp],2036477234 |
| 76 mov DWORD [12+esp],1797285236 |
| 77 mov ebx,DWORD [84+esp] |
| 78 mov ebp,DWORD [88+esp] |
| 79 mov ecx,DWORD [104+esp] |
| 80 mov esi,DWORD [108+esp] |
| 81 mov edx,DWORD [116+esp] |
| 82 mov edi,DWORD [120+esp] |
| 83 mov DWORD [20+esp],ebx |
| 84 mov DWORD [24+esp],ebp |
| 85 mov DWORD [40+esp],ecx |
| 86 mov DWORD [44+esp],esi |
| 87 mov DWORD [52+esp],edx |
| 88 mov DWORD [56+esp],edi |
| 89 mov ebx,DWORD [92+esp] |
| 90 mov edi,DWORD [124+esp] |
| 91 mov edx,DWORD [112+esp] |
| 92 mov ebp,DWORD [80+esp] |
| 93 mov ecx,DWORD [96+esp] |
| 94 mov esi,DWORD [100+esp] |
| 95 add edx,1 |
| 96 mov DWORD [28+esp],ebx |
| 97 mov DWORD [60+esp],edi |
| 98 mov DWORD [112+esp],edx |
| 99 mov ebx,10 |
| 100 jmp NEAR L$004loop |
| 101 align 16 |
| 102 L$004loop: |
| 103 add eax,ebp |
| 104 mov DWORD [128+esp],ebx |
| 105 mov ebx,ebp |
| 106 xor edx,eax |
| 107 rol edx,16 |
| 108 add ecx,edx |
| 109 xor ebx,ecx |
| 110 mov edi,DWORD [52+esp] |
| 111 rol ebx,12 |
| 112 mov ebp,DWORD [20+esp] |
| 113 add eax,ebx |
| 114 xor edx,eax |
| 115 mov DWORD [esp],eax |
| 116 rol edx,8 |
| 117 mov eax,DWORD [4+esp] |
| 118 add ecx,edx |
| 119 mov DWORD [48+esp],edx |
| 120 xor ebx,ecx |
| 121 add eax,ebp |
| 122 rol ebx,7 |
| 123 xor edi,eax |
| 124 mov DWORD [32+esp],ecx |
| 125 rol edi,16 |
| 126 mov DWORD [16+esp],ebx |
| 127 add esi,edi |
| 128 mov ecx,DWORD [40+esp] |
| 129 xor ebp,esi |
| 130 mov edx,DWORD [56+esp] |
| 131 rol ebp,12 |
| 132 mov ebx,DWORD [24+esp] |
| 133 add eax,ebp |
| 134 xor edi,eax |
| 135 mov DWORD [4+esp],eax |
| 136 rol edi,8 |
| 137 mov eax,DWORD [8+esp] |
| 138 add esi,edi |
| 139 mov DWORD [52+esp],edi |
| 140 xor ebp,esi |
| 141 add eax,ebx |
| 142 rol ebp,7 |
| 143 xor edx,eax |
| 144 mov DWORD [36+esp],esi |
| 145 rol edx,16 |
| 146 mov DWORD [20+esp],ebp |
| 147 add ecx,edx |
| 148 mov esi,DWORD [44+esp] |
| 149 xor ebx,ecx |
| 150 mov edi,DWORD [60+esp] |
| 151 rol ebx,12 |
| 152 mov ebp,DWORD [28+esp] |
| 153 add eax,ebx |
| 154 xor edx,eax |
| 155 mov DWORD [8+esp],eax |
| 156 rol edx,8 |
| 157 mov eax,DWORD [12+esp] |
| 158 add ecx,edx |
| 159 mov DWORD [56+esp],edx |
| 160 xor ebx,ecx |
| 161 add eax,ebp |
| 162 rol ebx,7 |
| 163 xor edi,eax |
| 164 rol edi,16 |
| 165 mov DWORD [24+esp],ebx |
| 166 add esi,edi |
| 167 xor ebp,esi |
| 168 rol ebp,12 |
| 169 mov ebx,DWORD [20+esp] |
| 170 add eax,ebp |
| 171 xor edi,eax |
| 172 mov DWORD [12+esp],eax |
| 173 rol edi,8 |
| 174 mov eax,DWORD [esp] |
| 175 add esi,edi |
| 176 mov edx,edi |
| 177 xor ebp,esi |
| 178 add eax,ebx |
| 179 rol ebp,7 |
| 180 xor edx,eax |
| 181 rol edx,16 |
| 182 mov DWORD [28+esp],ebp |
| 183 add ecx,edx |
| 184 xor ebx,ecx |
| 185 mov edi,DWORD [48+esp] |
| 186 rol ebx,12 |
| 187 mov ebp,DWORD [24+esp] |
| 188 add eax,ebx |
| 189 xor edx,eax |
| 190 mov DWORD [esp],eax |
| 191 rol edx,8 |
| 192 mov eax,DWORD [4+esp] |
| 193 add ecx,edx |
| 194 mov DWORD [60+esp],edx |
| 195 xor ebx,ecx |
| 196 add eax,ebp |
| 197 rol ebx,7 |
| 198 xor edi,eax |
| 199 mov DWORD [40+esp],ecx |
| 200 rol edi,16 |
| 201 mov DWORD [20+esp],ebx |
| 202 add esi,edi |
| 203 mov ecx,DWORD [32+esp] |
| 204 xor ebp,esi |
| 205 mov edx,DWORD [52+esp] |
| 206 rol ebp,12 |
| 207 mov ebx,DWORD [28+esp] |
| 208 add eax,ebp |
| 209 xor edi,eax |
| 210 mov DWORD [4+esp],eax |
| 211 rol edi,8 |
| 212 mov eax,DWORD [8+esp] |
| 213 add esi,edi |
| 214 mov DWORD [48+esp],edi |
| 215 xor ebp,esi |
| 216 add eax,ebx |
| 217 rol ebp,7 |
| 218 xor edx,eax |
| 219 mov DWORD [44+esp],esi |
| 220 rol edx,16 |
| 221 mov DWORD [24+esp],ebp |
| 222 add ecx,edx |
| 223 mov esi,DWORD [36+esp] |
| 224 xor ebx,ecx |
| 225 mov edi,DWORD [56+esp] |
| 226 rol ebx,12 |
| 227 mov ebp,DWORD [16+esp] |
| 228 add eax,ebx |
| 229 xor edx,eax |
| 230 mov DWORD [8+esp],eax |
| 231 rol edx,8 |
| 232 mov eax,DWORD [12+esp] |
| 233 add ecx,edx |
| 234 mov DWORD [52+esp],edx |
| 235 xor ebx,ecx |
| 236 add eax,ebp |
| 237 rol ebx,7 |
| 238 xor edi,eax |
| 239 rol edi,16 |
| 240 mov DWORD [28+esp],ebx |
| 241 add esi,edi |
| 242 xor ebp,esi |
| 243 mov edx,DWORD [48+esp] |
| 244 rol ebp,12 |
| 245 mov ebx,DWORD [128+esp] |
| 246 add eax,ebp |
| 247 xor edi,eax |
| 248 mov DWORD [12+esp],eax |
| 249 rol edi,8 |
| 250 mov eax,DWORD [esp] |
| 251 add esi,edi |
| 252 mov DWORD [56+esp],edi |
| 253 xor ebp,esi |
| 254 rol ebp,7 |
| 255 dec ebx |
| 256 jnz NEAR L$004loop |
| 257 mov ebx,DWORD [160+esp] |
| 258 add eax,1634760805 |
| 259 add ebp,DWORD [80+esp] |
| 260 add ecx,DWORD [96+esp] |
| 261 add esi,DWORD [100+esp] |
| 262 cmp ebx,64 |
| 263 jb NEAR L$005tail |
| 264 mov ebx,DWORD [156+esp] |
| 265 add edx,DWORD [112+esp] |
| 266 add edi,DWORD [120+esp] |
| 267 xor eax,DWORD [ebx] |
| 268 xor ebp,DWORD [16+ebx] |
| 269 mov DWORD [esp],eax |
| 270 mov eax,DWORD [152+esp] |
| 271 xor ecx,DWORD [32+ebx] |
| 272 xor esi,DWORD [36+ebx] |
| 273 xor edx,DWORD [48+ebx] |
| 274 xor edi,DWORD [56+ebx] |
| 275 mov DWORD [16+eax],ebp |
| 276 mov DWORD [32+eax],ecx |
| 277 mov DWORD [36+eax],esi |
| 278 mov DWORD [48+eax],edx |
| 279 mov DWORD [56+eax],edi |
| 280 mov ebp,DWORD [4+esp] |
| 281 mov ecx,DWORD [8+esp] |
| 282 mov esi,DWORD [12+esp] |
| 283 mov edx,DWORD [20+esp] |
| 284 mov edi,DWORD [24+esp] |
| 285 add ebp,857760878 |
| 286 add ecx,2036477234 |
| 287 add esi,1797285236 |
| 288 add edx,DWORD [84+esp] |
| 289 add edi,DWORD [88+esp] |
| 290 xor ebp,DWORD [4+ebx] |
| 291 xor ecx,DWORD [8+ebx] |
| 292 xor esi,DWORD [12+ebx] |
| 293 xor edx,DWORD [20+ebx] |
| 294 xor edi,DWORD [24+ebx] |
| 295 mov DWORD [4+eax],ebp |
| 296 mov DWORD [8+eax],ecx |
| 297 mov DWORD [12+eax],esi |
| 298 mov DWORD [20+eax],edx |
| 299 mov DWORD [24+eax],edi |
| 300 mov ebp,DWORD [28+esp] |
| 301 mov ecx,DWORD [40+esp] |
| 302 mov esi,DWORD [44+esp] |
| 303 mov edx,DWORD [52+esp] |
| 304 mov edi,DWORD [60+esp] |
| 305 add ebp,DWORD [92+esp] |
| 306 add ecx,DWORD [104+esp] |
| 307 add esi,DWORD [108+esp] |
| 308 add edx,DWORD [116+esp] |
| 309 add edi,DWORD [124+esp] |
| 310 xor ebp,DWORD [28+ebx] |
| 311 xor ecx,DWORD [40+ebx] |
| 312 xor esi,DWORD [44+ebx] |
| 313 xor edx,DWORD [52+ebx] |
| 314 xor edi,DWORD [60+ebx] |
| 315 lea ebx,[64+ebx] |
| 316 mov DWORD [28+eax],ebp |
| 317 mov ebp,DWORD [esp] |
| 318 mov DWORD [40+eax],ecx |
| 319 mov ecx,DWORD [160+esp] |
| 320 mov DWORD [44+eax],esi |
| 321 mov DWORD [52+eax],edx |
| 322 mov DWORD [60+eax],edi |
| 323 mov DWORD [eax],ebp |
| 324 lea eax,[64+eax] |
| 325 sub ecx,64 |
| 326 jnz NEAR L$003outer_loop |
| 327 jmp NEAR L$006done |
| 328 L$005tail: |
| 329 add edx,DWORD [112+esp] |
| 330 add edi,DWORD [120+esp] |
| 331 mov DWORD [esp],eax |
| 332 mov DWORD [16+esp],ebp |
| 333 mov DWORD [32+esp],ecx |
| 334 mov DWORD [36+esp],esi |
| 335 mov DWORD [48+esp],edx |
| 336 mov DWORD [56+esp],edi |
| 337 mov ebp,DWORD [4+esp] |
| 338 mov ecx,DWORD [8+esp] |
| 339 mov esi,DWORD [12+esp] |
| 340 mov edx,DWORD [20+esp] |
| 341 mov edi,DWORD [24+esp] |
| 342 add ebp,857760878 |
| 343 add ecx,2036477234 |
| 344 add esi,1797285236 |
| 345 add edx,DWORD [84+esp] |
| 346 add edi,DWORD [88+esp] |
| 347 mov DWORD [4+esp],ebp |
| 348 mov DWORD [8+esp],ecx |
| 349 mov DWORD [12+esp],esi |
| 350 mov DWORD [20+esp],edx |
| 351 mov DWORD [24+esp],edi |
| 352 mov ebp,DWORD [28+esp] |
| 353 mov ecx,DWORD [40+esp] |
| 354 mov esi,DWORD [44+esp] |
| 355 mov edx,DWORD [52+esp] |
| 356 mov edi,DWORD [60+esp] |
| 357 add ebp,DWORD [92+esp] |
| 358 add ecx,DWORD [104+esp] |
| 359 add esi,DWORD [108+esp] |
| 360 add edx,DWORD [116+esp] |
| 361 add edi,DWORD [124+esp] |
| 362 mov DWORD [28+esp],ebp |
| 363 mov ebp,DWORD [156+esp] |
| 364 mov DWORD [40+esp],ecx |
| 365 mov ecx,DWORD [152+esp] |
| 366 mov DWORD [44+esp],esi |
| 367 xor esi,esi |
| 368 mov DWORD [52+esp],edx |
| 369 mov DWORD [60+esp],edi |
| 370 xor eax,eax |
| 371 xor edx,edx |
| 372 L$007tail_loop: |
| 373 mov al,BYTE [ebp*1+esi] |
| 374 mov dl,BYTE [esi*1+esp] |
| 375 lea esi,[1+esi] |
| 376 xor al,dl |
| 377 mov BYTE [esi*1+ecx-1],al |
| 378 dec ebx |
| 379 jnz NEAR L$007tail_loop |
| 380 L$006done: |
| 381 add esp,132 |
| 382 L$000no_data: |
| 383 pop edi |
| 384 pop esi |
| 385 pop ebx |
| 386 pop ebp |
| 387 ret |
| 388 global _ChaCha20_ssse3 |
| 389 align 16 |
| 390 _ChaCha20_ssse3: |
| 391 L$_ChaCha20_ssse3_begin: |
| 392 push ebp |
| 393 push ebx |
| 394 push esi |
| 395 push edi |
| 396 L$ssse3_shortcut: |
| 397 mov edi,DWORD [20+esp] |
| 398 mov esi,DWORD [24+esp] |
| 399 mov ecx,DWORD [28+esp] |
| 400 mov edx,DWORD [32+esp] |
| 401 mov ebx,DWORD [36+esp] |
| 402 mov ebp,esp |
| 403 sub esp,524 |
| 404 and esp,-64 |
| 405 mov DWORD [512+esp],ebp |
| 406 lea eax,[(L$ssse3_data-L$pic_point)+eax] |
| 407 movdqu xmm3,[ebx] |
| 408 cmp ecx,256 |
| 409 jb NEAR L$0081x |
| 410 mov DWORD [516+esp],edx |
| 411 mov DWORD [520+esp],ebx |
| 412 sub ecx,256 |
| 413 lea ebp,[384+esp] |
| 414 movdqu xmm7,[edx] |
| 415 pshufd xmm0,xmm3,0 |
| 416 pshufd xmm1,xmm3,85 |
| 417 pshufd xmm2,xmm3,170 |
| 418 pshufd xmm3,xmm3,255 |
| 419 paddd xmm0,[48+eax] |
| 420 pshufd xmm4,xmm7,0 |
| 421 pshufd xmm5,xmm7,85 |
| 422 psubd xmm0,[64+eax] |
| 423 pshufd xmm6,xmm7,170 |
| 424 pshufd xmm7,xmm7,255 |
| 425 movdqa [64+ebp],xmm0 |
| 426 movdqa [80+ebp],xmm1 |
| 427 movdqa [96+ebp],xmm2 |
| 428 movdqa [112+ebp],xmm3 |
| 429 movdqu xmm3,[16+edx] |
| 430 movdqa [ebp-64],xmm4 |
| 431 movdqa [ebp-48],xmm5 |
| 432 movdqa [ebp-32],xmm6 |
| 433 movdqa [ebp-16],xmm7 |
| 434 movdqa xmm7,[32+eax] |
| 435 lea ebx,[128+esp] |
| 436 pshufd xmm0,xmm3,0 |
| 437 pshufd xmm1,xmm3,85 |
| 438 pshufd xmm2,xmm3,170 |
| 439 pshufd xmm3,xmm3,255 |
| 440 pshufd xmm4,xmm7,0 |
| 441 pshufd xmm5,xmm7,85 |
| 442 pshufd xmm6,xmm7,170 |
| 443 pshufd xmm7,xmm7,255 |
| 444 movdqa [ebp],xmm0 |
| 445 movdqa [16+ebp],xmm1 |
| 446 movdqa [32+ebp],xmm2 |
| 447 movdqa [48+ebp],xmm3 |
| 448 movdqa [ebp-128],xmm4 |
| 449 movdqa [ebp-112],xmm5 |
| 450 movdqa [ebp-96],xmm6 |
| 451 movdqa [ebp-80],xmm7 |
| 452 lea esi,[128+esi] |
| 453 lea edi,[128+edi] |
| 454 jmp NEAR L$009outer_loop |
| 455 align 16 |
| 456 L$009outer_loop: |
| 457 movdqa xmm1,[ebp-112] |
| 458 movdqa xmm2,[ebp-96] |
| 459 movdqa xmm3,[ebp-80] |
| 460 movdqa xmm5,[ebp-48] |
| 461 movdqa xmm6,[ebp-32] |
| 462 movdqa xmm7,[ebp-16] |
| 463 movdqa [ebx-112],xmm1 |
| 464 movdqa [ebx-96],xmm2 |
| 465 movdqa [ebx-80],xmm3 |
| 466 movdqa [ebx-48],xmm5 |
| 467 movdqa [ebx-32],xmm6 |
| 468 movdqa [ebx-16],xmm7 |
| 469 movdqa xmm2,[32+ebp] |
| 470 movdqa xmm3,[48+ebp] |
| 471 movdqa xmm4,[64+ebp] |
| 472 movdqa xmm5,[80+ebp] |
| 473 movdqa xmm6,[96+ebp] |
| 474 movdqa xmm7,[112+ebp] |
| 475 paddd xmm4,[64+eax] |
| 476 movdqa [32+ebx],xmm2 |
| 477 movdqa [48+ebx],xmm3 |
| 478 movdqa [64+ebx],xmm4 |
| 479 movdqa [80+ebx],xmm5 |
| 480 movdqa [96+ebx],xmm6 |
| 481 movdqa [112+ebx],xmm7 |
| 482 movdqa [64+ebp],xmm4 |
| 483 movdqa xmm0,[ebp-128] |
| 484 movdqa xmm6,xmm4 |
| 485 movdqa xmm3,[ebp-64] |
| 486 movdqa xmm4,[ebp] |
| 487 movdqa xmm5,[16+ebp] |
| 488 mov edx,10 |
| 489 nop |
| 490 align 16 |
| 491 L$010loop: |
| 492 paddd xmm0,xmm3 |
| 493 movdqa xmm2,xmm3 |
| 494 pxor xmm6,xmm0 |
| 495 pshufb xmm6,[eax] |
| 496 paddd xmm4,xmm6 |
| 497 pxor xmm2,xmm4 |
| 498 movdqa xmm3,[ebx-48] |
| 499 movdqa xmm1,xmm2 |
| 500 pslld xmm2,12 |
| 501 psrld xmm1,20 |
| 502 por xmm2,xmm1 |
| 503 movdqa xmm1,[ebx-112] |
| 504 paddd xmm0,xmm2 |
| 505 movdqa xmm7,[80+ebx] |
| 506 pxor xmm6,xmm0 |
| 507 movdqa [ebx-128],xmm0 |
| 508 pshufb xmm6,[16+eax] |
| 509 paddd xmm4,xmm6 |
| 510 movdqa [64+ebx],xmm6 |
| 511 pxor xmm2,xmm4 |
| 512 paddd xmm1,xmm3 |
| 513 movdqa xmm0,xmm2 |
| 514 pslld xmm2,7 |
| 515 psrld xmm0,25 |
| 516 pxor xmm7,xmm1 |
| 517 por xmm2,xmm0 |
| 518 movdqa [ebx],xmm4 |
| 519 pshufb xmm7,[eax] |
| 520 movdqa [ebx-64],xmm2 |
| 521 paddd xmm5,xmm7 |
| 522 movdqa xmm4,[32+ebx] |
| 523 pxor xmm3,xmm5 |
| 524 movdqa xmm2,[ebx-32] |
| 525 movdqa xmm0,xmm3 |
| 526 pslld xmm3,12 |
| 527 psrld xmm0,20 |
| 528 por xmm3,xmm0 |
| 529 movdqa xmm0,[ebx-96] |
| 530 paddd xmm1,xmm3 |
| 531 movdqa xmm6,[96+ebx] |
| 532 pxor xmm7,xmm1 |
| 533 movdqa [ebx-112],xmm1 |
| 534 pshufb xmm7,[16+eax] |
| 535 paddd xmm5,xmm7 |
| 536 movdqa [80+ebx],xmm7 |
| 537 pxor xmm3,xmm5 |
| 538 paddd xmm0,xmm2 |
| 539 movdqa xmm1,xmm3 |
| 540 pslld xmm3,7 |
| 541 psrld xmm1,25 |
| 542 pxor xmm6,xmm0 |
| 543 por xmm3,xmm1 |
| 544 movdqa [16+ebx],xmm5 |
| 545 pshufb xmm6,[eax] |
| 546 movdqa [ebx-48],xmm3 |
| 547 paddd xmm4,xmm6 |
| 548 movdqa xmm5,[48+ebx] |
| 549 pxor xmm2,xmm4 |
| 550 movdqa xmm3,[ebx-16] |
| 551 movdqa xmm1,xmm2 |
| 552 pslld xmm2,12 |
| 553 psrld xmm1,20 |
| 554 por xmm2,xmm1 |
| 555 movdqa xmm1,[ebx-80] |
| 556 paddd xmm0,xmm2 |
| 557 movdqa xmm7,[112+ebx] |
| 558 pxor xmm6,xmm0 |
| 559 movdqa [ebx-96],xmm0 |
| 560 pshufb xmm6,[16+eax] |
| 561 paddd xmm4,xmm6 |
| 562 movdqa [96+ebx],xmm6 |
| 563 pxor xmm2,xmm4 |
| 564 paddd xmm1,xmm3 |
| 565 movdqa xmm0,xmm2 |
| 566 pslld xmm2,7 |
| 567 psrld xmm0,25 |
| 568 pxor xmm7,xmm1 |
| 569 por xmm2,xmm0 |
| 570 pshufb xmm7,[eax] |
| 571 movdqa [ebx-32],xmm2 |
| 572 paddd xmm5,xmm7 |
| 573 pxor xmm3,xmm5 |
| 574 movdqa xmm2,[ebx-48] |
| 575 movdqa xmm0,xmm3 |
| 576 pslld xmm3,12 |
| 577 psrld xmm0,20 |
| 578 por xmm3,xmm0 |
| 579 movdqa xmm0,[ebx-128] |
| 580 paddd xmm1,xmm3 |
| 581 pxor xmm7,xmm1 |
| 582 movdqa [ebx-80],xmm1 |
| 583 pshufb xmm7,[16+eax] |
| 584 paddd xmm5,xmm7 |
| 585 movdqa xmm6,xmm7 |
| 586 pxor xmm3,xmm5 |
| 587 paddd xmm0,xmm2 |
| 588 movdqa xmm1,xmm3 |
| 589 pslld xmm3,7 |
| 590 psrld xmm1,25 |
| 591 pxor xmm6,xmm0 |
| 592 por xmm3,xmm1 |
| 593 pshufb xmm6,[eax] |
| 594 movdqa [ebx-16],xmm3 |
| 595 paddd xmm4,xmm6 |
| 596 pxor xmm2,xmm4 |
| 597 movdqa xmm3,[ebx-32] |
| 598 movdqa xmm1,xmm2 |
| 599 pslld xmm2,12 |
| 600 psrld xmm1,20 |
| 601 por xmm2,xmm1 |
| 602 movdqa xmm1,[ebx-112] |
| 603 paddd xmm0,xmm2 |
| 604 movdqa xmm7,[64+ebx] |
| 605 pxor xmm6,xmm0 |
| 606 movdqa [ebx-128],xmm0 |
| 607 pshufb xmm6,[16+eax] |
| 608 paddd xmm4,xmm6 |
| 609 movdqa [112+ebx],xmm6 |
| 610 pxor xmm2,xmm4 |
| 611 paddd xmm1,xmm3 |
| 612 movdqa xmm0,xmm2 |
| 613 pslld xmm2,7 |
| 614 psrld xmm0,25 |
| 615 pxor xmm7,xmm1 |
| 616 por xmm2,xmm0 |
| 617 movdqa [32+ebx],xmm4 |
| 618 pshufb xmm7,[eax] |
| 619 movdqa [ebx-48],xmm2 |
| 620 paddd xmm5,xmm7 |
| 621 movdqa xmm4,[ebx] |
| 622 pxor xmm3,xmm5 |
| 623 movdqa xmm2,[ebx-16] |
| 624 movdqa xmm0,xmm3 |
| 625 pslld xmm3,12 |
| 626 psrld xmm0,20 |
| 627 por xmm3,xmm0 |
| 628 movdqa xmm0,[ebx-96] |
| 629 paddd xmm1,xmm3 |
| 630 movdqa xmm6,[80+ebx] |
| 631 pxor xmm7,xmm1 |
| 632 movdqa [ebx-112],xmm1 |
| 633 pshufb xmm7,[16+eax] |
| 634 paddd xmm5,xmm7 |
| 635 movdqa [64+ebx],xmm7 |
| 636 pxor xmm3,xmm5 |
| 637 paddd xmm0,xmm2 |
| 638 movdqa xmm1,xmm3 |
| 639 pslld xmm3,7 |
| 640 psrld xmm1,25 |
| 641 pxor xmm6,xmm0 |
| 642 por xmm3,xmm1 |
| 643 movdqa [48+ebx],xmm5 |
| 644 pshufb xmm6,[eax] |
| 645 movdqa [ebx-32],xmm3 |
| 646 paddd xmm4,xmm6 |
| 647 movdqa xmm5,[16+ebx] |
| 648 pxor xmm2,xmm4 |
| 649 movdqa xmm3,[ebx-64] |
| 650 movdqa xmm1,xmm2 |
| 651 pslld xmm2,12 |
| 652 psrld xmm1,20 |
| 653 por xmm2,xmm1 |
| 654 movdqa xmm1,[ebx-80] |
| 655 paddd xmm0,xmm2 |
| 656 movdqa xmm7,[96+ebx] |
| 657 pxor xmm6,xmm0 |
| 658 movdqa [ebx-96],xmm0 |
| 659 pshufb xmm6,[16+eax] |
| 660 paddd xmm4,xmm6 |
| 661 movdqa [80+ebx],xmm6 |
| 662 pxor xmm2,xmm4 |
| 663 paddd xmm1,xmm3 |
| 664 movdqa xmm0,xmm2 |
| 665 pslld xmm2,7 |
| 666 psrld xmm0,25 |
| 667 pxor xmm7,xmm1 |
| 668 por xmm2,xmm0 |
| 669 pshufb xmm7,[eax] |
| 670 movdqa [ebx-16],xmm2 |
| 671 paddd xmm5,xmm7 |
| 672 pxor xmm3,xmm5 |
| 673 movdqa xmm0,xmm3 |
| 674 pslld xmm3,12 |
| 675 psrld xmm0,20 |
| 676 por xmm3,xmm0 |
| 677 movdqa xmm0,[ebx-128] |
| 678 paddd xmm1,xmm3 |
| 679 movdqa xmm6,[64+ebx] |
| 680 pxor xmm7,xmm1 |
| 681 movdqa [ebx-80],xmm1 |
| 682 pshufb xmm7,[16+eax] |
| 683 paddd xmm5,xmm7 |
| 684 movdqa [96+ebx],xmm7 |
| 685 pxor xmm3,xmm5 |
| 686 movdqa xmm1,xmm3 |
| 687 pslld xmm3,7 |
| 688 psrld xmm1,25 |
| 689 por xmm3,xmm1 |
| 690 dec edx |
| 691 jnz NEAR L$010loop |
| 692 movdqa [ebx-64],xmm3 |
| 693 movdqa [ebx],xmm4 |
| 694 movdqa [16+ebx],xmm5 |
| 695 movdqa [64+ebx],xmm6 |
| 696 movdqa [96+ebx],xmm7 |
| 697 movdqa xmm1,[ebx-112] |
| 698 movdqa xmm2,[ebx-96] |
| 699 movdqa xmm3,[ebx-80] |
| 700 paddd xmm0,[ebp-128] |
| 701 paddd xmm1,[ebp-112] |
| 702 paddd xmm2,[ebp-96] |
| 703 paddd xmm3,[ebp-80] |
| 704 movdqa xmm6,xmm0 |
| 705 punpckldq xmm0,xmm1 |
| 706 movdqa xmm7,xmm2 |
| 707 punpckldq xmm2,xmm3 |
| 708 punpckhdq xmm6,xmm1 |
| 709 punpckhdq xmm7,xmm3 |
| 710 movdqa xmm1,xmm0 |
| 711 punpcklqdq xmm0,xmm2 |
| 712 movdqa xmm3,xmm6 |
| 713 punpcklqdq xmm6,xmm7 |
| 714 punpckhqdq xmm1,xmm2 |
| 715 punpckhqdq xmm3,xmm7 |
| 716 movdqu xmm4,[esi-128] |
| 717 movdqu xmm5,[esi-64] |
| 718 movdqu xmm2,[esi] |
| 719 movdqu xmm7,[64+esi] |
| 720 lea esi,[16+esi] |
| 721 pxor xmm4,xmm0 |
| 722 movdqa xmm0,[ebx-64] |
| 723 pxor xmm5,xmm1 |
| 724 movdqa xmm1,[ebx-48] |
| 725 pxor xmm6,xmm2 |
| 726 movdqa xmm2,[ebx-32] |
| 727 pxor xmm7,xmm3 |
| 728 movdqa xmm3,[ebx-16] |
| 729 movdqu [edi-128],xmm4 |
| 730 movdqu [edi-64],xmm5 |
| 731 movdqu [edi],xmm6 |
| 732 movdqu [64+edi],xmm7 |
| 733 lea edi,[16+edi] |
| 734 paddd xmm0,[ebp-64] |
| 735 paddd xmm1,[ebp-48] |
| 736 paddd xmm2,[ebp-32] |
| 737 paddd xmm3,[ebp-16] |
| 738 movdqa xmm6,xmm0 |
| 739 punpckldq xmm0,xmm1 |
| 740 movdqa xmm7,xmm2 |
| 741 punpckldq xmm2,xmm3 |
| 742 punpckhdq xmm6,xmm1 |
| 743 punpckhdq xmm7,xmm3 |
| 744 movdqa xmm1,xmm0 |
| 745 punpcklqdq xmm0,xmm2 |
| 746 movdqa xmm3,xmm6 |
| 747 punpcklqdq xmm6,xmm7 |
| 748 punpckhqdq xmm1,xmm2 |
| 749 punpckhqdq xmm3,xmm7 |
| 750 movdqu xmm4,[esi-128] |
| 751 movdqu xmm5,[esi-64] |
| 752 movdqu xmm2,[esi] |
| 753 movdqu xmm7,[64+esi] |
| 754 lea esi,[16+esi] |
| 755 pxor xmm4,xmm0 |
| 756 movdqa xmm0,[ebx] |
| 757 pxor xmm5,xmm1 |
| 758 movdqa xmm1,[16+ebx] |
| 759 pxor xmm6,xmm2 |
| 760 movdqa xmm2,[32+ebx] |
| 761 pxor xmm7,xmm3 |
| 762 movdqa xmm3,[48+ebx] |
| 763 movdqu [edi-128],xmm4 |
| 764 movdqu [edi-64],xmm5 |
| 765 movdqu [edi],xmm6 |
| 766 movdqu [64+edi],xmm7 |
| 767 lea edi,[16+edi] |
| 768 paddd xmm0,[ebp] |
| 769 paddd xmm1,[16+ebp] |
| 770 paddd xmm2,[32+ebp] |
| 771 paddd xmm3,[48+ebp] |
| 772 movdqa xmm6,xmm0 |
| 773 punpckldq xmm0,xmm1 |
| 774 movdqa xmm7,xmm2 |
| 775 punpckldq xmm2,xmm3 |
| 776 punpckhdq xmm6,xmm1 |
| 777 punpckhdq xmm7,xmm3 |
| 778 movdqa xmm1,xmm0 |
| 779 punpcklqdq xmm0,xmm2 |
| 780 movdqa xmm3,xmm6 |
| 781 punpcklqdq xmm6,xmm7 |
| 782 punpckhqdq xmm1,xmm2 |
| 783 punpckhqdq xmm3,xmm7 |
| 784 movdqu xmm4,[esi-128] |
| 785 movdqu xmm5,[esi-64] |
| 786 movdqu xmm2,[esi] |
| 787 movdqu xmm7,[64+esi] |
| 788 lea esi,[16+esi] |
| 789 pxor xmm4,xmm0 |
| 790 movdqa xmm0,[64+ebx] |
| 791 pxor xmm5,xmm1 |
| 792 movdqa xmm1,[80+ebx] |
| 793 pxor xmm6,xmm2 |
| 794 movdqa xmm2,[96+ebx] |
| 795 pxor xmm7,xmm3 |
| 796 movdqa xmm3,[112+ebx] |
| 797 movdqu [edi-128],xmm4 |
| 798 movdqu [edi-64],xmm5 |
| 799 movdqu [edi],xmm6 |
| 800 movdqu [64+edi],xmm7 |
| 801 lea edi,[16+edi] |
| 802 paddd xmm0,[64+ebp] |
| 803 paddd xmm1,[80+ebp] |
| 804 paddd xmm2,[96+ebp] |
| 805 paddd xmm3,[112+ebp] |
| 806 movdqa xmm6,xmm0 |
| 807 punpckldq xmm0,xmm1 |
| 808 movdqa xmm7,xmm2 |
| 809 punpckldq xmm2,xmm3 |
| 810 punpckhdq xmm6,xmm1 |
| 811 punpckhdq xmm7,xmm3 |
| 812 movdqa xmm1,xmm0 |
| 813 punpcklqdq xmm0,xmm2 |
| 814 movdqa xmm3,xmm6 |
| 815 punpcklqdq xmm6,xmm7 |
| 816 punpckhqdq xmm1,xmm2 |
| 817 punpckhqdq xmm3,xmm7 |
| 818 movdqu xmm4,[esi-128] |
| 819 movdqu xmm5,[esi-64] |
| 820 movdqu xmm2,[esi] |
| 821 movdqu xmm7,[64+esi] |
| 822 lea esi,[208+esi] |
| 823 pxor xmm4,xmm0 |
| 824 pxor xmm5,xmm1 |
| 825 pxor xmm6,xmm2 |
| 826 pxor xmm7,xmm3 |
| 827 movdqu [edi-128],xmm4 |
| 828 movdqu [edi-64],xmm5 |
| 829 movdqu [edi],xmm6 |
| 830 movdqu [64+edi],xmm7 |
| 831 lea edi,[208+edi] |
| 832 sub ecx,256 |
| 833 jnc NEAR L$009outer_loop |
| 834 add ecx,256 |
| 835 jz NEAR L$011done |
| 836 mov ebx,DWORD [520+esp] |
| 837 lea esi,[esi-128] |
| 838 mov edx,DWORD [516+esp] |
| 839 lea edi,[edi-128] |
| 840 movd xmm2,DWORD [64+ebp] |
| 841 movdqu xmm3,[ebx] |
| 842 paddd xmm2,[96+eax] |
| 843 pand xmm3,[112+eax] |
| 844 por xmm3,xmm2 |
| 845 L$0081x: |
| 846 movdqa xmm0,[32+eax] |
| 847 movdqu xmm1,[edx] |
| 848 movdqu xmm2,[16+edx] |
| 849 movdqa xmm6,[eax] |
| 850 movdqa xmm7,[16+eax] |
| 851 mov DWORD [48+esp],ebp |
| 852 movdqa [esp],xmm0 |
| 853 movdqa [16+esp],xmm1 |
| 854 movdqa [32+esp],xmm2 |
| 855 movdqa [48+esp],xmm3 |
| 856 mov edx,10 |
| 857 jmp NEAR L$012loop1x |
| 858 align 16 |
| 859 L$013outer1x: |
| 860 movdqa xmm3,[80+eax] |
| 861 movdqa xmm0,[esp] |
| 862 movdqa xmm1,[16+esp] |
| 863 movdqa xmm2,[32+esp] |
| 864 paddd xmm3,[48+esp] |
| 865 mov edx,10 |
| 866 movdqa [48+esp],xmm3 |
| 867 jmp NEAR L$012loop1x |
| 868 align 16 |
| 869 L$012loop1x: |
| 870 paddd xmm0,xmm1 |
| 871 pxor xmm3,xmm0 |
| 872 db 102,15,56,0,222 |
| 873 paddd xmm2,xmm3 |
| 874 pxor xmm1,xmm2 |
| 875 movdqa xmm4,xmm1 |
| 876 psrld xmm1,20 |
| 877 pslld xmm4,12 |
| 878 por xmm1,xmm4 |
| 879 paddd xmm0,xmm1 |
| 880 pxor xmm3,xmm0 |
| 881 db 102,15,56,0,223 |
| 882 paddd xmm2,xmm3 |
| 883 pxor xmm1,xmm2 |
| 884 movdqa xmm4,xmm1 |
| 885 psrld xmm1,25 |
| 886 pslld xmm4,7 |
| 887 por xmm1,xmm4 |
| 888 pshufd xmm2,xmm2,78 |
| 889 pshufd xmm1,xmm1,57 |
| 890 pshufd xmm3,xmm3,147 |
| 891 nop |
| 892 paddd xmm0,xmm1 |
| 893 pxor xmm3,xmm0 |
| 894 db 102,15,56,0,222 |
| 895 paddd xmm2,xmm3 |
| 896 pxor xmm1,xmm2 |
| 897 movdqa xmm4,xmm1 |
| 898 psrld xmm1,20 |
| 899 pslld xmm4,12 |
| 900 por xmm1,xmm4 |
| 901 paddd xmm0,xmm1 |
| 902 pxor xmm3,xmm0 |
| 903 db 102,15,56,0,223 |
| 904 paddd xmm2,xmm3 |
| 905 pxor xmm1,xmm2 |
| 906 movdqa xmm4,xmm1 |
| 907 psrld xmm1,25 |
| 908 pslld xmm4,7 |
| 909 por xmm1,xmm4 |
| 910 pshufd xmm2,xmm2,78 |
| 911 pshufd xmm1,xmm1,147 |
| 912 pshufd xmm3,xmm3,57 |
| 913 dec edx |
| 914 jnz NEAR L$012loop1x |
| 915 paddd xmm0,[esp] |
| 916 paddd xmm1,[16+esp] |
| 917 paddd xmm2,[32+esp] |
| 918 paddd xmm3,[48+esp] |
| 919 cmp ecx,64 |
| 920 jb NEAR L$014tail |
| 921 movdqu xmm4,[esi] |
| 922 movdqu xmm5,[16+esi] |
| 923 pxor xmm0,xmm4 |
| 924 movdqu xmm4,[32+esi] |
| 925 pxor xmm1,xmm5 |
| 926 movdqu xmm5,[48+esi] |
| 927 pxor xmm2,xmm4 |
| 928 pxor xmm3,xmm5 |
| 929 lea esi,[64+esi] |
| 930 movdqu [edi],xmm0 |
| 931 movdqu [16+edi],xmm1 |
| 932 movdqu [32+edi],xmm2 |
| 933 movdqu [48+edi],xmm3 |
| 934 lea edi,[64+edi] |
| 935 sub ecx,64 |
| 936 jnz NEAR L$013outer1x |
| 937 jmp NEAR L$011done |
| 938 L$014tail: |
| 939 movdqa [esp],xmm0 |
| 940 movdqa [16+esp],xmm1 |
| 941 movdqa [32+esp],xmm2 |
| 942 movdqa [48+esp],xmm3 |
| 943 xor eax,eax |
| 944 xor edx,edx |
| 945 xor ebp,ebp |
| 946 L$015tail_loop: |
| 947 mov al,BYTE [ebp*1+esp] |
| 948 mov dl,BYTE [ebp*1+esi] |
| 949 lea ebp,[1+ebp] |
| 950 xor al,dl |
| 951 mov BYTE [ebp*1+edi-1],al |
| 952 dec ecx |
| 953 jnz NEAR L$015tail_loop |
| 954 L$011done: |
| 955 mov esp,DWORD [512+esp] |
| 956 pop edi |
| 957 pop esi |
| 958 pop ebx |
| 959 pop ebp |
| 960 ret |
| 961 align 64 |
| 962 L$ssse3_data: |
| 963 db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 |
| 964 db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 |
| 965 dd 1634760805,857760878,2036477234,1797285236 |
| 966 dd 0,1,2,3 |
| 967 dd 4,4,4,4 |
| 968 dd 1,0,0,0 |
| 969 dd 4,0,0,0 |
| 970 dd 0,-1,-1,-1 |
| 971 align 64 |
| 972 db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 |
| 973 db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 |
| 974 db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 |
| 975 db 114,103,62,0 |
| 976 segment .bss |
| 977 common _OPENSSL_ia32cap_P 16 |
OLD | NEW |