OLD | NEW |
(Empty) | |
| 1 %ifidn __OUTPUT_FORMAT__,obj |
| 2 section code use32 class=code align=64 |
| 3 %elifidn __OUTPUT_FORMAT__,win32 |
| 4 %ifdef __YASM_VERSION_ID__ |
| 5 %if __YASM_VERSION_ID__ < 01010000h |
| 6 %error yasm version 1.1.0 or later needed. |
| 7 %endif |
| 8 ; Yasm automatically includes .00 and complains about redefining it. |
| 9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html |
| 10 %else |
| 11 $@feat.00 equ 1 |
| 12 %endif |
| 13 section .text code align=64 |
| 14 %else |
| 15 section .text code |
| 16 %endif |
| 17 global _gcm_gmult_4bit_x86 |
| 18 align 16 |
| 19 _gcm_gmult_4bit_x86: |
| 20 L$_gcm_gmult_4bit_x86_begin: |
| 21 push ebp |
| 22 push ebx |
| 23 push esi |
| 24 push edi |
| 25 sub esp,84 |
| 26 mov edi,DWORD [104+esp] |
| 27 mov esi,DWORD [108+esp] |
| 28 mov ebp,DWORD [edi] |
| 29 mov edx,DWORD [4+edi] |
| 30 mov ecx,DWORD [8+edi] |
| 31 mov ebx,DWORD [12+edi] |
| 32 mov DWORD [16+esp],0 |
| 33 mov DWORD [20+esp],471859200 |
| 34 mov DWORD [24+esp],943718400 |
| 35 mov DWORD [28+esp],610271232 |
| 36 mov DWORD [32+esp],1887436800 |
| 37 mov DWORD [36+esp],1822425088 |
| 38 mov DWORD [40+esp],1220542464 |
| 39 mov DWORD [44+esp],1423966208 |
| 40 mov DWORD [48+esp],3774873600 |
| 41 mov DWORD [52+esp],4246732800 |
| 42 mov DWORD [56+esp],3644850176 |
| 43 mov DWORD [60+esp],3311403008 |
| 44 mov DWORD [64+esp],2441084928 |
| 45 mov DWORD [68+esp],2376073216 |
| 46 mov DWORD [72+esp],2847932416 |
| 47 mov DWORD [76+esp],3051356160 |
| 48 mov DWORD [esp],ebp |
| 49 mov DWORD [4+esp],edx |
| 50 mov DWORD [8+esp],ecx |
| 51 mov DWORD [12+esp],ebx |
| 52 shr ebx,20 |
| 53 and ebx,240 |
| 54 mov ebp,DWORD [4+ebx*1+esi] |
| 55 mov edx,DWORD [ebx*1+esi] |
| 56 mov ecx,DWORD [12+ebx*1+esi] |
| 57 mov ebx,DWORD [8+ebx*1+esi] |
| 58 xor eax,eax |
| 59 mov edi,15 |
| 60 jmp NEAR L$000x86_loop |
| 61 align 16 |
| 62 L$000x86_loop: |
| 63 mov al,bl |
| 64 shrd ebx,ecx,4 |
| 65 and al,15 |
| 66 shrd ecx,edx,4 |
| 67 shrd edx,ebp,4 |
| 68 shr ebp,4 |
| 69 xor ebp,DWORD [16+eax*4+esp] |
| 70 mov al,BYTE [edi*1+esp] |
| 71 and al,240 |
| 72 xor ebx,DWORD [8+eax*1+esi] |
| 73 xor ecx,DWORD [12+eax*1+esi] |
| 74 xor edx,DWORD [eax*1+esi] |
| 75 xor ebp,DWORD [4+eax*1+esi] |
| 76 dec edi |
| 77 js NEAR L$001x86_break |
| 78 mov al,bl |
| 79 shrd ebx,ecx,4 |
| 80 and al,15 |
| 81 shrd ecx,edx,4 |
| 82 shrd edx,ebp,4 |
| 83 shr ebp,4 |
| 84 xor ebp,DWORD [16+eax*4+esp] |
| 85 mov al,BYTE [edi*1+esp] |
| 86 shl al,4 |
| 87 xor ebx,DWORD [8+eax*1+esi] |
| 88 xor ecx,DWORD [12+eax*1+esi] |
| 89 xor edx,DWORD [eax*1+esi] |
| 90 xor ebp,DWORD [4+eax*1+esi] |
| 91 jmp NEAR L$000x86_loop |
| 92 align 16 |
| 93 L$001x86_break: |
| 94 bswap ebx |
| 95 bswap ecx |
| 96 bswap edx |
| 97 bswap ebp |
| 98 mov edi,DWORD [104+esp] |
| 99 mov DWORD [12+edi],ebx |
| 100 mov DWORD [8+edi],ecx |
| 101 mov DWORD [4+edi],edx |
| 102 mov DWORD [edi],ebp |
| 103 add esp,84 |
| 104 pop edi |
| 105 pop esi |
| 106 pop ebx |
| 107 pop ebp |
| 108 ret |
| 109 global _gcm_ghash_4bit_x86 |
| 110 align 16 |
| 111 _gcm_ghash_4bit_x86: |
| 112 L$_gcm_ghash_4bit_x86_begin: |
| 113 push ebp |
| 114 push ebx |
| 115 push esi |
| 116 push edi |
| 117 sub esp,84 |
| 118 mov ebx,DWORD [104+esp] |
| 119 mov esi,DWORD [108+esp] |
| 120 mov edi,DWORD [112+esp] |
| 121 mov ecx,DWORD [116+esp] |
| 122 add ecx,edi |
| 123 mov DWORD [116+esp],ecx |
| 124 mov ebp,DWORD [ebx] |
| 125 mov edx,DWORD [4+ebx] |
| 126 mov ecx,DWORD [8+ebx] |
| 127 mov ebx,DWORD [12+ebx] |
| 128 mov DWORD [16+esp],0 |
| 129 mov DWORD [20+esp],471859200 |
| 130 mov DWORD [24+esp],943718400 |
| 131 mov DWORD [28+esp],610271232 |
| 132 mov DWORD [32+esp],1887436800 |
| 133 mov DWORD [36+esp],1822425088 |
| 134 mov DWORD [40+esp],1220542464 |
| 135 mov DWORD [44+esp],1423966208 |
| 136 mov DWORD [48+esp],3774873600 |
| 137 mov DWORD [52+esp],4246732800 |
| 138 mov DWORD [56+esp],3644850176 |
| 139 mov DWORD [60+esp],3311403008 |
| 140 mov DWORD [64+esp],2441084928 |
| 141 mov DWORD [68+esp],2376073216 |
| 142 mov DWORD [72+esp],2847932416 |
| 143 mov DWORD [76+esp],3051356160 |
| 144 align 16 |
| 145 L$002x86_outer_loop: |
| 146 xor ebx,DWORD [12+edi] |
| 147 xor ecx,DWORD [8+edi] |
| 148 xor edx,DWORD [4+edi] |
| 149 xor ebp,DWORD [edi] |
| 150 mov DWORD [12+esp],ebx |
| 151 mov DWORD [8+esp],ecx |
| 152 mov DWORD [4+esp],edx |
| 153 mov DWORD [esp],ebp |
| 154 shr ebx,20 |
| 155 and ebx,240 |
| 156 mov ebp,DWORD [4+ebx*1+esi] |
| 157 mov edx,DWORD [ebx*1+esi] |
| 158 mov ecx,DWORD [12+ebx*1+esi] |
| 159 mov ebx,DWORD [8+ebx*1+esi] |
| 160 xor eax,eax |
| 161 mov edi,15 |
| 162 jmp NEAR L$003x86_loop |
| 163 align 16 |
| 164 L$003x86_loop: |
| 165 mov al,bl |
| 166 shrd ebx,ecx,4 |
| 167 and al,15 |
| 168 shrd ecx,edx,4 |
| 169 shrd edx,ebp,4 |
| 170 shr ebp,4 |
| 171 xor ebp,DWORD [16+eax*4+esp] |
| 172 mov al,BYTE [edi*1+esp] |
| 173 and al,240 |
| 174 xor ebx,DWORD [8+eax*1+esi] |
| 175 xor ecx,DWORD [12+eax*1+esi] |
| 176 xor edx,DWORD [eax*1+esi] |
| 177 xor ebp,DWORD [4+eax*1+esi] |
| 178 dec edi |
| 179 js NEAR L$004x86_break |
| 180 mov al,bl |
| 181 shrd ebx,ecx,4 |
| 182 and al,15 |
| 183 shrd ecx,edx,4 |
| 184 shrd edx,ebp,4 |
| 185 shr ebp,4 |
| 186 xor ebp,DWORD [16+eax*4+esp] |
| 187 mov al,BYTE [edi*1+esp] |
| 188 shl al,4 |
| 189 xor ebx,DWORD [8+eax*1+esi] |
| 190 xor ecx,DWORD [12+eax*1+esi] |
| 191 xor edx,DWORD [eax*1+esi] |
| 192 xor ebp,DWORD [4+eax*1+esi] |
| 193 jmp NEAR L$003x86_loop |
| 194 align 16 |
| 195 L$004x86_break: |
| 196 bswap ebx |
| 197 bswap ecx |
| 198 bswap edx |
| 199 bswap ebp |
| 200 mov edi,DWORD [112+esp] |
| 201 lea edi,[16+edi] |
| 202 cmp edi,DWORD [116+esp] |
| 203 mov DWORD [112+esp],edi |
| 204 jb NEAR L$002x86_outer_loop |
| 205 mov edi,DWORD [104+esp] |
| 206 mov DWORD [12+edi],ebx |
| 207 mov DWORD [8+edi],ecx |
| 208 mov DWORD [4+edi],edx |
| 209 mov DWORD [edi],ebp |
| 210 add esp,84 |
| 211 pop edi |
| 212 pop esi |
| 213 pop ebx |
| 214 pop ebp |
| 215 ret |
| 216 global _gcm_gmult_4bit_mmx |
| 217 align 16 |
| 218 _gcm_gmult_4bit_mmx: |
| 219 L$_gcm_gmult_4bit_mmx_begin: |
| 220 push ebp |
| 221 push ebx |
| 222 push esi |
| 223 push edi |
| 224 mov edi,DWORD [20+esp] |
| 225 mov esi,DWORD [24+esp] |
| 226 call L$005pic_point |
| 227 L$005pic_point: |
| 228 pop eax |
| 229 lea eax,[(L$rem_4bit-L$005pic_point)+eax] |
| 230 movzx ebx,BYTE [15+edi] |
| 231 xor ecx,ecx |
| 232 mov edx,ebx |
| 233 mov cl,dl |
| 234 mov ebp,14 |
| 235 shl cl,4 |
| 236 and edx,240 |
| 237 movq mm0,[8+ecx*1+esi] |
| 238 movq mm1,[ecx*1+esi] |
| 239 movd ebx,mm0 |
| 240 jmp NEAR L$006mmx_loop |
| 241 align 16 |
| 242 L$006mmx_loop: |
| 243 psrlq mm0,4 |
| 244 and ebx,15 |
| 245 movq mm2,mm1 |
| 246 psrlq mm1,4 |
| 247 pxor mm0,[8+edx*1+esi] |
| 248 mov cl,BYTE [ebp*1+edi] |
| 249 psllq mm2,60 |
| 250 pxor mm1,[ebx*8+eax] |
| 251 dec ebp |
| 252 movd ebx,mm0 |
| 253 pxor mm1,[edx*1+esi] |
| 254 mov edx,ecx |
| 255 pxor mm0,mm2 |
| 256 js NEAR L$007mmx_break |
| 257 shl cl,4 |
| 258 and ebx,15 |
| 259 psrlq mm0,4 |
| 260 and edx,240 |
| 261 movq mm2,mm1 |
| 262 psrlq mm1,4 |
| 263 pxor mm0,[8+ecx*1+esi] |
| 264 psllq mm2,60 |
| 265 pxor mm1,[ebx*8+eax] |
| 266 movd ebx,mm0 |
| 267 pxor mm1,[ecx*1+esi] |
| 268 pxor mm0,mm2 |
| 269 jmp NEAR L$006mmx_loop |
| 270 align 16 |
| 271 L$007mmx_break: |
| 272 shl cl,4 |
| 273 and ebx,15 |
| 274 psrlq mm0,4 |
| 275 and edx,240 |
| 276 movq mm2,mm1 |
| 277 psrlq mm1,4 |
| 278 pxor mm0,[8+ecx*1+esi] |
| 279 psllq mm2,60 |
| 280 pxor mm1,[ebx*8+eax] |
| 281 movd ebx,mm0 |
| 282 pxor mm1,[ecx*1+esi] |
| 283 pxor mm0,mm2 |
| 284 psrlq mm0,4 |
| 285 and ebx,15 |
| 286 movq mm2,mm1 |
| 287 psrlq mm1,4 |
| 288 pxor mm0,[8+edx*1+esi] |
| 289 psllq mm2,60 |
| 290 pxor mm1,[ebx*8+eax] |
| 291 movd ebx,mm0 |
| 292 pxor mm1,[edx*1+esi] |
| 293 pxor mm0,mm2 |
| 294 psrlq mm0,32 |
| 295 movd edx,mm1 |
| 296 psrlq mm1,32 |
| 297 movd ecx,mm0 |
| 298 movd ebp,mm1 |
| 299 bswap ebx |
| 300 bswap edx |
| 301 bswap ecx |
| 302 bswap ebp |
| 303 emms |
| 304 mov DWORD [12+edi],ebx |
| 305 mov DWORD [4+edi],edx |
| 306 mov DWORD [8+edi],ecx |
| 307 mov DWORD [edi],ebp |
| 308 pop edi |
| 309 pop esi |
| 310 pop ebx |
| 311 pop ebp |
| 312 ret |
| 313 global _gcm_ghash_4bit_mmx |
| 314 align 16 |
| 315 _gcm_ghash_4bit_mmx: |
| 316 L$_gcm_ghash_4bit_mmx_begin: |
| 317 push ebp |
| 318 push ebx |
| 319 push esi |
| 320 push edi |
| 321 mov eax,DWORD [20+esp] |
| 322 mov ebx,DWORD [24+esp] |
| 323 mov ecx,DWORD [28+esp] |
| 324 mov edx,DWORD [32+esp] |
| 325 mov ebp,esp |
| 326 call L$008pic_point |
| 327 L$008pic_point: |
| 328 pop esi |
| 329 lea esi,[(L$rem_8bit-L$008pic_point)+esi] |
| 330 sub esp,544 |
| 331 and esp,-64 |
| 332 sub esp,16 |
| 333 add edx,ecx |
| 334 mov DWORD [544+esp],eax |
| 335 mov DWORD [552+esp],edx |
| 336 mov DWORD [556+esp],ebp |
| 337 add ebx,128 |
| 338 lea edi,[144+esp] |
| 339 lea ebp,[400+esp] |
| 340 mov edx,DWORD [ebx-120] |
| 341 movq mm0,[ebx-120] |
| 342 movq mm3,[ebx-128] |
| 343 shl edx,4 |
| 344 mov BYTE [esp],dl |
| 345 mov edx,DWORD [ebx-104] |
| 346 movq mm2,[ebx-104] |
| 347 movq mm5,[ebx-112] |
| 348 movq [edi-128],mm0 |
| 349 psrlq mm0,4 |
| 350 movq [edi],mm3 |
| 351 movq mm7,mm3 |
| 352 psrlq mm3,4 |
| 353 shl edx,4 |
| 354 mov BYTE [1+esp],dl |
| 355 mov edx,DWORD [ebx-88] |
| 356 movq mm1,[ebx-88] |
| 357 psllq mm7,60 |
| 358 movq mm4,[ebx-96] |
| 359 por mm0,mm7 |
| 360 movq [edi-120],mm2 |
| 361 psrlq mm2,4 |
| 362 movq [8+edi],mm5 |
| 363 movq mm6,mm5 |
| 364 movq [ebp-128],mm0 |
| 365 psrlq mm5,4 |
| 366 movq [ebp],mm3 |
| 367 shl edx,4 |
| 368 mov BYTE [2+esp],dl |
| 369 mov edx,DWORD [ebx-72] |
| 370 movq mm0,[ebx-72] |
| 371 psllq mm6,60 |
| 372 movq mm3,[ebx-80] |
| 373 por mm2,mm6 |
| 374 movq [edi-112],mm1 |
| 375 psrlq mm1,4 |
| 376 movq [16+edi],mm4 |
| 377 movq mm7,mm4 |
| 378 movq [ebp-120],mm2 |
| 379 psrlq mm4,4 |
| 380 movq [8+ebp],mm5 |
| 381 shl edx,4 |
| 382 mov BYTE [3+esp],dl |
| 383 mov edx,DWORD [ebx-56] |
| 384 movq mm2,[ebx-56] |
| 385 psllq mm7,60 |
| 386 movq mm5,[ebx-64] |
| 387 por mm1,mm7 |
| 388 movq [edi-104],mm0 |
| 389 psrlq mm0,4 |
| 390 movq [24+edi],mm3 |
| 391 movq mm6,mm3 |
| 392 movq [ebp-112],mm1 |
| 393 psrlq mm3,4 |
| 394 movq [16+ebp],mm4 |
| 395 shl edx,4 |
| 396 mov BYTE [4+esp],dl |
| 397 mov edx,DWORD [ebx-40] |
| 398 movq mm1,[ebx-40] |
| 399 psllq mm6,60 |
| 400 movq mm4,[ebx-48] |
| 401 por mm0,mm6 |
| 402 movq [edi-96],mm2 |
| 403 psrlq mm2,4 |
| 404 movq [32+edi],mm5 |
| 405 movq mm7,mm5 |
| 406 movq [ebp-104],mm0 |
| 407 psrlq mm5,4 |
| 408 movq [24+ebp],mm3 |
| 409 shl edx,4 |
| 410 mov BYTE [5+esp],dl |
| 411 mov edx,DWORD [ebx-24] |
| 412 movq mm0,[ebx-24] |
| 413 psllq mm7,60 |
| 414 movq mm3,[ebx-32] |
| 415 por mm2,mm7 |
| 416 movq [edi-88],mm1 |
| 417 psrlq mm1,4 |
| 418 movq [40+edi],mm4 |
| 419 movq mm6,mm4 |
| 420 movq [ebp-96],mm2 |
| 421 psrlq mm4,4 |
| 422 movq [32+ebp],mm5 |
| 423 shl edx,4 |
| 424 mov BYTE [6+esp],dl |
| 425 mov edx,DWORD [ebx-8] |
| 426 movq mm2,[ebx-8] |
| 427 psllq mm6,60 |
| 428 movq mm5,[ebx-16] |
| 429 por mm1,mm6 |
| 430 movq [edi-80],mm0 |
| 431 psrlq mm0,4 |
| 432 movq [48+edi],mm3 |
| 433 movq mm7,mm3 |
| 434 movq [ebp-88],mm1 |
| 435 psrlq mm3,4 |
| 436 movq [40+ebp],mm4 |
| 437 shl edx,4 |
| 438 mov BYTE [7+esp],dl |
| 439 mov edx,DWORD [8+ebx] |
| 440 movq mm1,[8+ebx] |
| 441 psllq mm7,60 |
| 442 movq mm4,[ebx] |
| 443 por mm0,mm7 |
| 444 movq [edi-72],mm2 |
| 445 psrlq mm2,4 |
| 446 movq [56+edi],mm5 |
| 447 movq mm6,mm5 |
| 448 movq [ebp-80],mm0 |
| 449 psrlq mm5,4 |
| 450 movq [48+ebp],mm3 |
| 451 shl edx,4 |
| 452 mov BYTE [8+esp],dl |
| 453 mov edx,DWORD [24+ebx] |
| 454 movq mm0,[24+ebx] |
| 455 psllq mm6,60 |
| 456 movq mm3,[16+ebx] |
| 457 por mm2,mm6 |
| 458 movq [edi-64],mm1 |
| 459 psrlq mm1,4 |
| 460 movq [64+edi],mm4 |
| 461 movq mm7,mm4 |
| 462 movq [ebp-72],mm2 |
| 463 psrlq mm4,4 |
| 464 movq [56+ebp],mm5 |
| 465 shl edx,4 |
| 466 mov BYTE [9+esp],dl |
| 467 mov edx,DWORD [40+ebx] |
| 468 movq mm2,[40+ebx] |
| 469 psllq mm7,60 |
| 470 movq mm5,[32+ebx] |
| 471 por mm1,mm7 |
| 472 movq [edi-56],mm0 |
| 473 psrlq mm0,4 |
| 474 movq [72+edi],mm3 |
| 475 movq mm6,mm3 |
| 476 movq [ebp-64],mm1 |
| 477 psrlq mm3,4 |
| 478 movq [64+ebp],mm4 |
| 479 shl edx,4 |
| 480 mov BYTE [10+esp],dl |
| 481 mov edx,DWORD [56+ebx] |
| 482 movq mm1,[56+ebx] |
| 483 psllq mm6,60 |
| 484 movq mm4,[48+ebx] |
| 485 por mm0,mm6 |
| 486 movq [edi-48],mm2 |
| 487 psrlq mm2,4 |
| 488 movq [80+edi],mm5 |
| 489 movq mm7,mm5 |
| 490 movq [ebp-56],mm0 |
| 491 psrlq mm5,4 |
| 492 movq [72+ebp],mm3 |
| 493 shl edx,4 |
| 494 mov BYTE [11+esp],dl |
| 495 mov edx,DWORD [72+ebx] |
| 496 movq mm0,[72+ebx] |
| 497 psllq mm7,60 |
| 498 movq mm3,[64+ebx] |
| 499 por mm2,mm7 |
| 500 movq [edi-40],mm1 |
| 501 psrlq mm1,4 |
| 502 movq [88+edi],mm4 |
| 503 movq mm6,mm4 |
| 504 movq [ebp-48],mm2 |
| 505 psrlq mm4,4 |
| 506 movq [80+ebp],mm5 |
| 507 shl edx,4 |
| 508 mov BYTE [12+esp],dl |
| 509 mov edx,DWORD [88+ebx] |
| 510 movq mm2,[88+ebx] |
| 511 psllq mm6,60 |
| 512 movq mm5,[80+ebx] |
| 513 por mm1,mm6 |
| 514 movq [edi-32],mm0 |
| 515 psrlq mm0,4 |
| 516 movq [96+edi],mm3 |
| 517 movq mm7,mm3 |
| 518 movq [ebp-40],mm1 |
| 519 psrlq mm3,4 |
| 520 movq [88+ebp],mm4 |
| 521 shl edx,4 |
| 522 mov BYTE [13+esp],dl |
| 523 mov edx,DWORD [104+ebx] |
| 524 movq mm1,[104+ebx] |
| 525 psllq mm7,60 |
| 526 movq mm4,[96+ebx] |
| 527 por mm0,mm7 |
| 528 movq [edi-24],mm2 |
| 529 psrlq mm2,4 |
| 530 movq [104+edi],mm5 |
| 531 movq mm6,mm5 |
| 532 movq [ebp-32],mm0 |
| 533 psrlq mm5,4 |
| 534 movq [96+ebp],mm3 |
| 535 shl edx,4 |
| 536 mov BYTE [14+esp],dl |
| 537 mov edx,DWORD [120+ebx] |
| 538 movq mm0,[120+ebx] |
| 539 psllq mm6,60 |
| 540 movq mm3,[112+ebx] |
| 541 por mm2,mm6 |
| 542 movq [edi-16],mm1 |
| 543 psrlq mm1,4 |
| 544 movq [112+edi],mm4 |
| 545 movq mm7,mm4 |
| 546 movq [ebp-24],mm2 |
| 547 psrlq mm4,4 |
| 548 movq [104+ebp],mm5 |
| 549 shl edx,4 |
| 550 mov BYTE [15+esp],dl |
| 551 psllq mm7,60 |
| 552 por mm1,mm7 |
| 553 movq [edi-8],mm0 |
| 554 psrlq mm0,4 |
| 555 movq [120+edi],mm3 |
| 556 movq mm6,mm3 |
| 557 movq [ebp-16],mm1 |
| 558 psrlq mm3,4 |
| 559 movq [112+ebp],mm4 |
| 560 psllq mm6,60 |
| 561 por mm0,mm6 |
| 562 movq [ebp-8],mm0 |
| 563 movq [120+ebp],mm3 |
| 564 movq mm6,[eax] |
| 565 mov ebx,DWORD [8+eax] |
| 566 mov edx,DWORD [12+eax] |
| 567 align 16 |
| 568 L$009outer: |
| 569 xor edx,DWORD [12+ecx] |
| 570 xor ebx,DWORD [8+ecx] |
| 571 pxor mm6,[ecx] |
| 572 lea ecx,[16+ecx] |
| 573 mov DWORD [536+esp],ebx |
| 574 movq [528+esp],mm6 |
| 575 mov DWORD [548+esp],ecx |
| 576 xor eax,eax |
| 577 rol edx,8 |
| 578 mov al,dl |
| 579 mov ebp,eax |
| 580 and al,15 |
| 581 shr ebp,4 |
| 582 pxor mm0,mm0 |
| 583 rol edx,8 |
| 584 pxor mm1,mm1 |
| 585 pxor mm2,mm2 |
| 586 movq mm7,[16+eax*8+esp] |
| 587 movq mm6,[144+eax*8+esp] |
| 588 mov al,dl |
| 589 movd ebx,mm7 |
| 590 psrlq mm7,8 |
| 591 movq mm3,mm6 |
| 592 mov edi,eax |
| 593 psrlq mm6,8 |
| 594 pxor mm7,[272+ebp*8+esp] |
| 595 and al,15 |
| 596 psllq mm3,56 |
| 597 shr edi,4 |
| 598 pxor mm7,[16+eax*8+esp] |
| 599 rol edx,8 |
| 600 pxor mm6,[144+eax*8+esp] |
| 601 pxor mm7,mm3 |
| 602 pxor mm6,[400+ebp*8+esp] |
| 603 xor bl,BYTE [ebp*1+esp] |
| 604 mov al,dl |
| 605 movd ecx,mm7 |
| 606 movzx ebx,bl |
| 607 psrlq mm7,8 |
| 608 movq mm3,mm6 |
| 609 mov ebp,eax |
| 610 psrlq mm6,8 |
| 611 pxor mm7,[272+edi*8+esp] |
| 612 and al,15 |
| 613 psllq mm3,56 |
| 614 shr ebp,4 |
| 615 pinsrw mm2,WORD [ebx*2+esi],2 |
| 616 pxor mm7,[16+eax*8+esp] |
| 617 rol edx,8 |
| 618 pxor mm6,[144+eax*8+esp] |
| 619 pxor mm7,mm3 |
| 620 pxor mm6,[400+edi*8+esp] |
| 621 xor cl,BYTE [edi*1+esp] |
| 622 mov al,dl |
| 623 mov edx,DWORD [536+esp] |
| 624 movd ebx,mm7 |
| 625 movzx ecx,cl |
| 626 psrlq mm7,8 |
| 627 movq mm3,mm6 |
| 628 mov edi,eax |
| 629 psrlq mm6,8 |
| 630 pxor mm7,[272+ebp*8+esp] |
| 631 and al,15 |
| 632 psllq mm3,56 |
| 633 pxor mm6,mm2 |
| 634 shr edi,4 |
| 635 pinsrw mm1,WORD [ecx*2+esi],2 |
| 636 pxor mm7,[16+eax*8+esp] |
| 637 rol edx,8 |
| 638 pxor mm6,[144+eax*8+esp] |
| 639 pxor mm7,mm3 |
| 640 pxor mm6,[400+ebp*8+esp] |
| 641 xor bl,BYTE [ebp*1+esp] |
| 642 mov al,dl |
| 643 movd ecx,mm7 |
| 644 movzx ebx,bl |
| 645 psrlq mm7,8 |
| 646 movq mm3,mm6 |
| 647 mov ebp,eax |
| 648 psrlq mm6,8 |
| 649 pxor mm7,[272+edi*8+esp] |
| 650 and al,15 |
| 651 psllq mm3,56 |
| 652 pxor mm6,mm1 |
| 653 shr ebp,4 |
| 654 pinsrw mm0,WORD [ebx*2+esi],2 |
| 655 pxor mm7,[16+eax*8+esp] |
| 656 rol edx,8 |
| 657 pxor mm6,[144+eax*8+esp] |
| 658 pxor mm7,mm3 |
| 659 pxor mm6,[400+edi*8+esp] |
| 660 xor cl,BYTE [edi*1+esp] |
| 661 mov al,dl |
| 662 movd ebx,mm7 |
| 663 movzx ecx,cl |
| 664 psrlq mm7,8 |
| 665 movq mm3,mm6 |
| 666 mov edi,eax |
| 667 psrlq mm6,8 |
| 668 pxor mm7,[272+ebp*8+esp] |
| 669 and al,15 |
| 670 psllq mm3,56 |
| 671 pxor mm6,mm0 |
| 672 shr edi,4 |
| 673 pinsrw mm2,WORD [ecx*2+esi],2 |
| 674 pxor mm7,[16+eax*8+esp] |
| 675 rol edx,8 |
| 676 pxor mm6,[144+eax*8+esp] |
| 677 pxor mm7,mm3 |
| 678 pxor mm6,[400+ebp*8+esp] |
| 679 xor bl,BYTE [ebp*1+esp] |
| 680 mov al,dl |
| 681 movd ecx,mm7 |
| 682 movzx ebx,bl |
| 683 psrlq mm7,8 |
| 684 movq mm3,mm6 |
| 685 mov ebp,eax |
| 686 psrlq mm6,8 |
| 687 pxor mm7,[272+edi*8+esp] |
| 688 and al,15 |
| 689 psllq mm3,56 |
| 690 pxor mm6,mm2 |
| 691 shr ebp,4 |
| 692 pinsrw mm1,WORD [ebx*2+esi],2 |
| 693 pxor mm7,[16+eax*8+esp] |
| 694 rol edx,8 |
| 695 pxor mm6,[144+eax*8+esp] |
| 696 pxor mm7,mm3 |
| 697 pxor mm6,[400+edi*8+esp] |
| 698 xor cl,BYTE [edi*1+esp] |
| 699 mov al,dl |
| 700 mov edx,DWORD [532+esp] |
| 701 movd ebx,mm7 |
| 702 movzx ecx,cl |
| 703 psrlq mm7,8 |
| 704 movq mm3,mm6 |
| 705 mov edi,eax |
| 706 psrlq mm6,8 |
| 707 pxor mm7,[272+ebp*8+esp] |
| 708 and al,15 |
| 709 psllq mm3,56 |
| 710 pxor mm6,mm1 |
| 711 shr edi,4 |
| 712 pinsrw mm0,WORD [ecx*2+esi],2 |
| 713 pxor mm7,[16+eax*8+esp] |
| 714 rol edx,8 |
| 715 pxor mm6,[144+eax*8+esp] |
| 716 pxor mm7,mm3 |
| 717 pxor mm6,[400+ebp*8+esp] |
| 718 xor bl,BYTE [ebp*1+esp] |
| 719 mov al,dl |
| 720 movd ecx,mm7 |
| 721 movzx ebx,bl |
| 722 psrlq mm7,8 |
| 723 movq mm3,mm6 |
| 724 mov ebp,eax |
| 725 psrlq mm6,8 |
| 726 pxor mm7,[272+edi*8+esp] |
| 727 and al,15 |
| 728 psllq mm3,56 |
| 729 pxor mm6,mm0 |
| 730 shr ebp,4 |
| 731 pinsrw mm2,WORD [ebx*2+esi],2 |
| 732 pxor mm7,[16+eax*8+esp] |
| 733 rol edx,8 |
| 734 pxor mm6,[144+eax*8+esp] |
| 735 pxor mm7,mm3 |
| 736 pxor mm6,[400+edi*8+esp] |
| 737 xor cl,BYTE [edi*1+esp] |
| 738 mov al,dl |
| 739 movd ebx,mm7 |
| 740 movzx ecx,cl |
| 741 psrlq mm7,8 |
| 742 movq mm3,mm6 |
| 743 mov edi,eax |
| 744 psrlq mm6,8 |
| 745 pxor mm7,[272+ebp*8+esp] |
| 746 and al,15 |
| 747 psllq mm3,56 |
| 748 pxor mm6,mm2 |
| 749 shr edi,4 |
| 750 pinsrw mm1,WORD [ecx*2+esi],2 |
| 751 pxor mm7,[16+eax*8+esp] |
| 752 rol edx,8 |
| 753 pxor mm6,[144+eax*8+esp] |
| 754 pxor mm7,mm3 |
| 755 pxor mm6,[400+ebp*8+esp] |
| 756 xor bl,BYTE [ebp*1+esp] |
| 757 mov al,dl |
| 758 movd ecx,mm7 |
| 759 movzx ebx,bl |
| 760 psrlq mm7,8 |
| 761 movq mm3,mm6 |
| 762 mov ebp,eax |
| 763 psrlq mm6,8 |
| 764 pxor mm7,[272+edi*8+esp] |
| 765 and al,15 |
| 766 psllq mm3,56 |
| 767 pxor mm6,mm1 |
| 768 shr ebp,4 |
| 769 pinsrw mm0,WORD [ebx*2+esi],2 |
| 770 pxor mm7,[16+eax*8+esp] |
| 771 rol edx,8 |
| 772 pxor mm6,[144+eax*8+esp] |
| 773 pxor mm7,mm3 |
| 774 pxor mm6,[400+edi*8+esp] |
| 775 xor cl,BYTE [edi*1+esp] |
| 776 mov al,dl |
| 777 mov edx,DWORD [528+esp] |
| 778 movd ebx,mm7 |
| 779 movzx ecx,cl |
| 780 psrlq mm7,8 |
| 781 movq mm3,mm6 |
| 782 mov edi,eax |
| 783 psrlq mm6,8 |
| 784 pxor mm7,[272+ebp*8+esp] |
| 785 and al,15 |
| 786 psllq mm3,56 |
| 787 pxor mm6,mm0 |
| 788 shr edi,4 |
| 789 pinsrw mm2,WORD [ecx*2+esi],2 |
| 790 pxor mm7,[16+eax*8+esp] |
| 791 rol edx,8 |
| 792 pxor mm6,[144+eax*8+esp] |
| 793 pxor mm7,mm3 |
| 794 pxor mm6,[400+ebp*8+esp] |
| 795 xor bl,BYTE [ebp*1+esp] |
| 796 mov al,dl |
| 797 movd ecx,mm7 |
| 798 movzx ebx,bl |
| 799 psrlq mm7,8 |
| 800 movq mm3,mm6 |
| 801 mov ebp,eax |
| 802 psrlq mm6,8 |
| 803 pxor mm7,[272+edi*8+esp] |
| 804 and al,15 |
| 805 psllq mm3,56 |
| 806 pxor mm6,mm2 |
| 807 shr ebp,4 |
| 808 pinsrw mm1,WORD [ebx*2+esi],2 |
| 809 pxor mm7,[16+eax*8+esp] |
| 810 rol edx,8 |
| 811 pxor mm6,[144+eax*8+esp] |
| 812 pxor mm7,mm3 |
| 813 pxor mm6,[400+edi*8+esp] |
| 814 xor cl,BYTE [edi*1+esp] |
| 815 mov al,dl |
| 816 movd ebx,mm7 |
| 817 movzx ecx,cl |
| 818 psrlq mm7,8 |
| 819 movq mm3,mm6 |
| 820 mov edi,eax |
| 821 psrlq mm6,8 |
| 822 pxor mm7,[272+ebp*8+esp] |
| 823 and al,15 |
| 824 psllq mm3,56 |
| 825 pxor mm6,mm1 |
| 826 shr edi,4 |
| 827 pinsrw mm0,WORD [ecx*2+esi],2 |
| 828 pxor mm7,[16+eax*8+esp] |
| 829 rol edx,8 |
| 830 pxor mm6,[144+eax*8+esp] |
| 831 pxor mm7,mm3 |
| 832 pxor mm6,[400+ebp*8+esp] |
| 833 xor bl,BYTE [ebp*1+esp] |
| 834 mov al,dl |
| 835 movd ecx,mm7 |
| 836 movzx ebx,bl |
| 837 psrlq mm7,8 |
| 838 movq mm3,mm6 |
| 839 mov ebp,eax |
| 840 psrlq mm6,8 |
| 841 pxor mm7,[272+edi*8+esp] |
| 842 and al,15 |
| 843 psllq mm3,56 |
| 844 pxor mm6,mm0 |
| 845 shr ebp,4 |
| 846 pinsrw mm2,WORD [ebx*2+esi],2 |
| 847 pxor mm7,[16+eax*8+esp] |
| 848 rol edx,8 |
| 849 pxor mm6,[144+eax*8+esp] |
| 850 pxor mm7,mm3 |
| 851 pxor mm6,[400+edi*8+esp] |
| 852 xor cl,BYTE [edi*1+esp] |
| 853 mov al,dl |
| 854 mov edx,DWORD [524+esp] |
| 855 movd ebx,mm7 |
| 856 movzx ecx,cl |
| 857 psrlq mm7,8 |
| 858 movq mm3,mm6 |
| 859 mov edi,eax |
| 860 psrlq mm6,8 |
| 861 pxor mm7,[272+ebp*8+esp] |
| 862 and al,15 |
| 863 psllq mm3,56 |
| 864 pxor mm6,mm2 |
| 865 shr edi,4 |
| 866 pinsrw mm1,WORD [ecx*2+esi],2 |
| 867 pxor mm7,[16+eax*8+esp] |
| 868 pxor mm6,[144+eax*8+esp] |
| 869 xor bl,BYTE [ebp*1+esp] |
| 870 pxor mm7,mm3 |
| 871 pxor mm6,[400+ebp*8+esp] |
| 872 movzx ebx,bl |
| 873 pxor mm2,mm2 |
| 874 psllq mm1,4 |
| 875 movd ecx,mm7 |
| 876 psrlq mm7,4 |
| 877 movq mm3,mm6 |
| 878 psrlq mm6,4 |
| 879 shl ecx,4 |
| 880 pxor mm7,[16+edi*8+esp] |
| 881 psllq mm3,60 |
| 882 movzx ecx,cl |
| 883 pxor mm7,mm3 |
| 884 pxor mm6,[144+edi*8+esp] |
| 885 pinsrw mm0,WORD [ebx*2+esi],2 |
| 886 pxor mm6,mm1 |
| 887 movd edx,mm7 |
| 888 pinsrw mm2,WORD [ecx*2+esi],3 |
| 889 psllq mm0,12 |
| 890 pxor mm6,mm0 |
| 891 psrlq mm7,32 |
| 892 pxor mm6,mm2 |
| 893 mov ecx,DWORD [548+esp] |
| 894 movd ebx,mm7 |
| 895 movq mm3,mm6 |
| 896 psllw mm6,8 |
| 897 psrlw mm3,8 |
| 898 por mm6,mm3 |
| 899 bswap edx |
| 900 pshufw mm6,mm6,27 |
| 901 bswap ebx |
| 902 cmp ecx,DWORD [552+esp] |
| 903 jne NEAR L$009outer |
| 904 mov eax,DWORD [544+esp] |
| 905 mov DWORD [12+eax],edx |
| 906 mov DWORD [8+eax],ebx |
| 907 movq [eax],mm6 |
| 908 mov esp,DWORD [556+esp] |
| 909 emms |
| 910 pop edi |
| 911 pop esi |
| 912 pop ebx |
| 913 pop ebp |
| 914 ret |
| 915 global _gcm_init_clmul |
| 916 align 16 |
| 917 _gcm_init_clmul: |
| 918 L$_gcm_init_clmul_begin: |
| 919 mov edx,DWORD [4+esp] |
| 920 mov eax,DWORD [8+esp] |
| 921 call L$010pic |
| 922 L$010pic: |
| 923 pop ecx |
| 924 lea ecx,[(L$bswap-L$010pic)+ecx] |
| 925 movdqu xmm2,[eax] |
| 926 pshufd xmm2,xmm2,78 |
| 927 pshufd xmm4,xmm2,255 |
| 928 movdqa xmm3,xmm2 |
| 929 psllq xmm2,1 |
| 930 pxor xmm5,xmm5 |
| 931 psrlq xmm3,63 |
| 932 pcmpgtd xmm5,xmm4 |
| 933 pslldq xmm3,8 |
| 934 por xmm2,xmm3 |
| 935 pand xmm5,[16+ecx] |
| 936 pxor xmm2,xmm5 |
| 937 movdqa xmm0,xmm2 |
| 938 movdqa xmm1,xmm0 |
| 939 pshufd xmm3,xmm0,78 |
| 940 pshufd xmm4,xmm2,78 |
| 941 pxor xmm3,xmm0 |
| 942 pxor xmm4,xmm2 |
| 943 db 102,15,58,68,194,0 |
| 944 db 102,15,58,68,202,17 |
| 945 db 102,15,58,68,220,0 |
| 946 xorps xmm3,xmm0 |
| 947 xorps xmm3,xmm1 |
| 948 movdqa xmm4,xmm3 |
| 949 psrldq xmm3,8 |
| 950 pslldq xmm4,8 |
| 951 pxor xmm1,xmm3 |
| 952 pxor xmm0,xmm4 |
| 953 movdqa xmm4,xmm0 |
| 954 movdqa xmm3,xmm0 |
| 955 psllq xmm0,5 |
| 956 pxor xmm3,xmm0 |
| 957 psllq xmm0,1 |
| 958 pxor xmm0,xmm3 |
| 959 psllq xmm0,57 |
| 960 movdqa xmm3,xmm0 |
| 961 pslldq xmm0,8 |
| 962 psrldq xmm3,8 |
| 963 pxor xmm0,xmm4 |
| 964 pxor xmm1,xmm3 |
| 965 movdqa xmm4,xmm0 |
| 966 psrlq xmm0,1 |
| 967 pxor xmm1,xmm4 |
| 968 pxor xmm4,xmm0 |
| 969 psrlq xmm0,5 |
| 970 pxor xmm0,xmm4 |
| 971 psrlq xmm0,1 |
| 972 pxor xmm0,xmm1 |
| 973 pshufd xmm3,xmm2,78 |
| 974 pshufd xmm4,xmm0,78 |
| 975 pxor xmm3,xmm2 |
| 976 movdqu [edx],xmm2 |
| 977 pxor xmm4,xmm0 |
| 978 movdqu [16+edx],xmm0 |
| 979 db 102,15,58,15,227,8 |
| 980 movdqu [32+edx],xmm4 |
| 981 ret |
| 982 global _gcm_gmult_clmul |
| 983 align 16 |
| 984 _gcm_gmult_clmul: |
| 985 L$_gcm_gmult_clmul_begin: |
| 986 mov eax,DWORD [4+esp] |
| 987 mov edx,DWORD [8+esp] |
| 988 call L$011pic |
| 989 L$011pic: |
| 990 pop ecx |
| 991 lea ecx,[(L$bswap-L$011pic)+ecx] |
| 992 movdqu xmm0,[eax] |
| 993 movdqa xmm5,[ecx] |
| 994 movups xmm2,[edx] |
| 995 db 102,15,56,0,197 |
| 996 movups xmm4,[32+edx] |
| 997 movdqa xmm1,xmm0 |
| 998 pshufd xmm3,xmm0,78 |
| 999 pxor xmm3,xmm0 |
| 1000 db 102,15,58,68,194,0 |
| 1001 db 102,15,58,68,202,17 |
| 1002 db 102,15,58,68,220,0 |
| 1003 xorps xmm3,xmm0 |
| 1004 xorps xmm3,xmm1 |
| 1005 movdqa xmm4,xmm3 |
| 1006 psrldq xmm3,8 |
| 1007 pslldq xmm4,8 |
| 1008 pxor xmm1,xmm3 |
| 1009 pxor xmm0,xmm4 |
| 1010 movdqa xmm4,xmm0 |
| 1011 movdqa xmm3,xmm0 |
| 1012 psllq xmm0,5 |
| 1013 pxor xmm3,xmm0 |
| 1014 psllq xmm0,1 |
| 1015 pxor xmm0,xmm3 |
| 1016 psllq xmm0,57 |
| 1017 movdqa xmm3,xmm0 |
| 1018 pslldq xmm0,8 |
| 1019 psrldq xmm3,8 |
| 1020 pxor xmm0,xmm4 |
| 1021 pxor xmm1,xmm3 |
| 1022 movdqa xmm4,xmm0 |
| 1023 psrlq xmm0,1 |
| 1024 pxor xmm1,xmm4 |
| 1025 pxor xmm4,xmm0 |
| 1026 psrlq xmm0,5 |
| 1027 pxor xmm0,xmm4 |
| 1028 psrlq xmm0,1 |
| 1029 pxor xmm0,xmm1 |
| 1030 db 102,15,56,0,197 |
| 1031 movdqu [eax],xmm0 |
| 1032 ret |
| 1033 global _gcm_ghash_clmul |
| 1034 align 16 |
| 1035 _gcm_ghash_clmul: |
| 1036 L$_gcm_ghash_clmul_begin: |
| 1037 push ebp |
| 1038 push ebx |
| 1039 push esi |
| 1040 push edi |
| 1041 mov eax,DWORD [20+esp] |
| 1042 mov edx,DWORD [24+esp] |
| 1043 mov esi,DWORD [28+esp] |
| 1044 mov ebx,DWORD [32+esp] |
| 1045 call L$012pic |
| 1046 L$012pic: |
| 1047 pop ecx |
| 1048 lea ecx,[(L$bswap-L$012pic)+ecx] |
| 1049 movdqu xmm0,[eax] |
| 1050 movdqa xmm5,[ecx] |
| 1051 movdqu xmm2,[edx] |
| 1052 db 102,15,56,0,197 |
| 1053 sub ebx,16 |
| 1054 jz NEAR L$013odd_tail |
| 1055 movdqu xmm3,[esi] |
| 1056 movdqu xmm6,[16+esi] |
| 1057 db 102,15,56,0,221 |
| 1058 db 102,15,56,0,245 |
| 1059 movdqu xmm5,[32+edx] |
| 1060 pxor xmm0,xmm3 |
| 1061 pshufd xmm3,xmm6,78 |
| 1062 movdqa xmm7,xmm6 |
| 1063 pxor xmm3,xmm6 |
| 1064 lea esi,[32+esi] |
| 1065 db 102,15,58,68,242,0 |
| 1066 db 102,15,58,68,250,17 |
| 1067 db 102,15,58,68,221,0 |
| 1068 movups xmm2,[16+edx] |
| 1069 nop |
| 1070 sub ebx,32 |
| 1071 jbe NEAR L$014even_tail |
| 1072 jmp NEAR L$015mod_loop |
| 1073 align 32 |
| 1074 L$015mod_loop: |
| 1075 pshufd xmm4,xmm0,78 |
| 1076 movdqa xmm1,xmm0 |
| 1077 pxor xmm4,xmm0 |
| 1078 nop |
| 1079 db 102,15,58,68,194,0 |
| 1080 db 102,15,58,68,202,17 |
| 1081 db 102,15,58,68,229,16 |
| 1082 movups xmm2,[edx] |
| 1083 xorps xmm0,xmm6 |
| 1084 movdqa xmm5,[ecx] |
| 1085 xorps xmm1,xmm7 |
| 1086 movdqu xmm7,[esi] |
| 1087 pxor xmm3,xmm0 |
| 1088 movdqu xmm6,[16+esi] |
| 1089 pxor xmm3,xmm1 |
| 1090 db 102,15,56,0,253 |
| 1091 pxor xmm4,xmm3 |
| 1092 movdqa xmm3,xmm4 |
| 1093 psrldq xmm4,8 |
| 1094 pslldq xmm3,8 |
| 1095 pxor xmm1,xmm4 |
| 1096 pxor xmm0,xmm3 |
| 1097 db 102,15,56,0,245 |
| 1098 pxor xmm1,xmm7 |
| 1099 movdqa xmm7,xmm6 |
| 1100 movdqa xmm4,xmm0 |
| 1101 movdqa xmm3,xmm0 |
| 1102 psllq xmm0,5 |
| 1103 pxor xmm3,xmm0 |
| 1104 psllq xmm0,1 |
| 1105 pxor xmm0,xmm3 |
| 1106 db 102,15,58,68,242,0 |
| 1107 movups xmm5,[32+edx] |
| 1108 psllq xmm0,57 |
| 1109 movdqa xmm3,xmm0 |
| 1110 pslldq xmm0,8 |
| 1111 psrldq xmm3,8 |
| 1112 pxor xmm0,xmm4 |
| 1113 pxor xmm1,xmm3 |
| 1114 pshufd xmm3,xmm7,78 |
| 1115 movdqa xmm4,xmm0 |
| 1116 psrlq xmm0,1 |
| 1117 pxor xmm3,xmm7 |
| 1118 pxor xmm1,xmm4 |
| 1119 db 102,15,58,68,250,17 |
| 1120 movups xmm2,[16+edx] |
| 1121 pxor xmm4,xmm0 |
| 1122 psrlq xmm0,5 |
| 1123 pxor xmm0,xmm4 |
| 1124 psrlq xmm0,1 |
| 1125 pxor xmm0,xmm1 |
| 1126 db 102,15,58,68,221,0 |
| 1127 lea esi,[32+esi] |
| 1128 sub ebx,32 |
| 1129 ja NEAR L$015mod_loop |
| 1130 L$014even_tail: |
| 1131 pshufd xmm4,xmm0,78 |
| 1132 movdqa xmm1,xmm0 |
| 1133 pxor xmm4,xmm0 |
| 1134 db 102,15,58,68,194,0 |
| 1135 db 102,15,58,68,202,17 |
| 1136 db 102,15,58,68,229,16 |
| 1137 movdqa xmm5,[ecx] |
| 1138 xorps xmm0,xmm6 |
| 1139 xorps xmm1,xmm7 |
| 1140 pxor xmm3,xmm0 |
| 1141 pxor xmm3,xmm1 |
| 1142 pxor xmm4,xmm3 |
| 1143 movdqa xmm3,xmm4 |
| 1144 psrldq xmm4,8 |
| 1145 pslldq xmm3,8 |
| 1146 pxor xmm1,xmm4 |
| 1147 pxor xmm0,xmm3 |
| 1148 movdqa xmm4,xmm0 |
| 1149 movdqa xmm3,xmm0 |
| 1150 psllq xmm0,5 |
| 1151 pxor xmm3,xmm0 |
| 1152 psllq xmm0,1 |
| 1153 pxor xmm0,xmm3 |
| 1154 psllq xmm0,57 |
| 1155 movdqa xmm3,xmm0 |
| 1156 pslldq xmm0,8 |
| 1157 psrldq xmm3,8 |
| 1158 pxor xmm0,xmm4 |
| 1159 pxor xmm1,xmm3 |
| 1160 movdqa xmm4,xmm0 |
| 1161 psrlq xmm0,1 |
| 1162 pxor xmm1,xmm4 |
| 1163 pxor xmm4,xmm0 |
| 1164 psrlq xmm0,5 |
| 1165 pxor xmm0,xmm4 |
| 1166 psrlq xmm0,1 |
| 1167 pxor xmm0,xmm1 |
| 1168 test ebx,ebx |
| 1169 jnz NEAR L$016done |
| 1170 movups xmm2,[edx] |
| 1171 L$013odd_tail: |
| 1172 movdqu xmm3,[esi] |
| 1173 db 102,15,56,0,221 |
| 1174 pxor xmm0,xmm3 |
| 1175 movdqa xmm1,xmm0 |
| 1176 pshufd xmm3,xmm0,78 |
| 1177 pshufd xmm4,xmm2,78 |
| 1178 pxor xmm3,xmm0 |
| 1179 pxor xmm4,xmm2 |
| 1180 db 102,15,58,68,194,0 |
| 1181 db 102,15,58,68,202,17 |
| 1182 db 102,15,58,68,220,0 |
| 1183 xorps xmm3,xmm0 |
| 1184 xorps xmm3,xmm1 |
| 1185 movdqa xmm4,xmm3 |
| 1186 psrldq xmm3,8 |
| 1187 pslldq xmm4,8 |
| 1188 pxor xmm1,xmm3 |
| 1189 pxor xmm0,xmm4 |
| 1190 movdqa xmm4,xmm0 |
| 1191 movdqa xmm3,xmm0 |
| 1192 psllq xmm0,5 |
| 1193 pxor xmm3,xmm0 |
| 1194 psllq xmm0,1 |
| 1195 pxor xmm0,xmm3 |
| 1196 psllq xmm0,57 |
| 1197 movdqa xmm3,xmm0 |
| 1198 pslldq xmm0,8 |
| 1199 psrldq xmm3,8 |
| 1200 pxor xmm0,xmm4 |
| 1201 pxor xmm1,xmm3 |
| 1202 movdqa xmm4,xmm0 |
| 1203 psrlq xmm0,1 |
| 1204 pxor xmm1,xmm4 |
| 1205 pxor xmm4,xmm0 |
| 1206 psrlq xmm0,5 |
| 1207 pxor xmm0,xmm4 |
| 1208 psrlq xmm0,1 |
| 1209 pxor xmm0,xmm1 |
| 1210 L$016done: |
| 1211 db 102,15,56,0,197 |
| 1212 movdqu [eax],xmm0 |
| 1213 pop edi |
| 1214 pop esi |
| 1215 pop ebx |
| 1216 pop ebp |
| 1217 ret |
| 1218 align 64 |
| 1219 L$bswap: |
| 1220 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 1221 db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 |
| 1222 align 64 |
| 1223 L$rem_8bit: |
| 1224 dw 0,450,900,582,1800,1738,1164,1358 |
| 1225 dw 3600,4050,3476,3158,2328,2266,2716,2910 |
| 1226 dw 7200,7650,8100,7782,6952,6890,6316,6510 |
| 1227 dw 4656,5106,4532,4214,5432,5370,5820,6014 |
| 1228 dw 14400,14722,15300,14854,16200,16010,15564,15630 |
| 1229 dw 13904,14226,13780,13334,12632,12442,13020,13086 |
| 1230 dw 9312,9634,10212,9766,9064,8874,8428,8494 |
| 1231 dw 10864,11186,10740,10294,11640,11450,12028,12094 |
| 1232 dw 28800,28994,29444,29382,30600,30282,29708,30158 |
| 1233 dw 32400,32594,32020,31958,31128,30810,31260,31710 |
| 1234 dw 27808,28002,28452,28390,27560,27242,26668,27118 |
| 1235 dw 25264,25458,24884,24822,26040,25722,26172,26622 |
| 1236 dw 18624,18690,19268,19078,20424,19978,19532,19854 |
| 1237 dw 18128,18194,17748,17558,16856,16410,16988,17310 |
| 1238 dw 21728,21794,22372,22182,21480,21034,20588,20910 |
| 1239 dw 23280,23346,22900,22710,24056,23610,24188,24510 |
| 1240 dw 57600,57538,57988,58182,58888,59338,58764,58446 |
| 1241 dw 61200,61138,60564,60758,59416,59866,60316,59998 |
| 1242 dw 64800,64738,65188,65382,64040,64490,63916,63598 |
| 1243 dw 62256,62194,61620,61814,62520,62970,63420,63102 |
| 1244 dw 55616,55426,56004,56070,56904,57226,56780,56334 |
| 1245 dw 55120,54930,54484,54550,53336,53658,54236,53790 |
| 1246 dw 50528,50338,50916,50982,49768,50090,49644,49198 |
| 1247 dw 52080,51890,51444,51510,52344,52666,53244,52798 |
| 1248 dw 37248,36930,37380,37830,38536,38730,38156,38094 |
| 1249 dw 40848,40530,39956,40406,39064,39258,39708,39646 |
| 1250 dw 36256,35938,36388,36838,35496,35690,35116,35054 |
| 1251 dw 33712,33394,32820,33270,33976,34170,34620,34558 |
| 1252 dw 43456,43010,43588,43910,44744,44810,44364,44174 |
| 1253 dw 42960,42514,42068,42390,41176,41242,41820,41630 |
| 1254 dw 46560,46114,46692,47014,45800,45866,45420,45230 |
| 1255 dw 48112,47666,47220,47542,48376,48442,49020,48830 |
| 1256 align 64 |
| 1257 L$rem_4bit: |
| 1258 dd 0,0,0,471859200,0,943718400,0,610271232 |
| 1259 dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
| 1260 dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
| 1261 dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
| 1262 db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 |
| 1263 db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 |
| 1264 db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 |
| 1265 db 0 |
OLD | NEW |