OLD | NEW |
(Empty) | |
| 1 #if defined(__i386__) |
| 2 .file "ghash-x86.S" |
| 3 .text |
| 4 .globl _gcm_gmult_4bit_x86 |
| 5 .private_extern _gcm_gmult_4bit_x86 |
| 6 .align 4 |
| 7 _gcm_gmult_4bit_x86: |
| 8 L_gcm_gmult_4bit_x86_begin: |
| 9 pushl %ebp |
| 10 pushl %ebx |
| 11 pushl %esi |
| 12 pushl %edi |
| 13 subl $84,%esp |
| 14 movl 104(%esp),%edi |
| 15 movl 108(%esp),%esi |
| 16 movl (%edi),%ebp |
| 17 movl 4(%edi),%edx |
| 18 movl 8(%edi),%ecx |
| 19 movl 12(%edi),%ebx |
| 20 movl $0,16(%esp) |
| 21 movl $471859200,20(%esp) |
| 22 movl $943718400,24(%esp) |
| 23 movl $610271232,28(%esp) |
| 24 movl $1887436800,32(%esp) |
| 25 movl $1822425088,36(%esp) |
| 26 movl $1220542464,40(%esp) |
| 27 movl $1423966208,44(%esp) |
| 28 movl $3774873600,48(%esp) |
| 29 movl $4246732800,52(%esp) |
| 30 movl $3644850176,56(%esp) |
| 31 movl $3311403008,60(%esp) |
| 32 movl $2441084928,64(%esp) |
| 33 movl $2376073216,68(%esp) |
| 34 movl $2847932416,72(%esp) |
| 35 movl $3051356160,76(%esp) |
| 36 movl %ebp,(%esp) |
| 37 movl %edx,4(%esp) |
| 38 movl %ecx,8(%esp) |
| 39 movl %ebx,12(%esp) |
| 40 shrl $20,%ebx |
| 41 andl $240,%ebx |
| 42 movl 4(%esi,%ebx,1),%ebp |
| 43 movl (%esi,%ebx,1),%edx |
| 44 movl 12(%esi,%ebx,1),%ecx |
| 45 movl 8(%esi,%ebx,1),%ebx |
| 46 xorl %eax,%eax |
| 47 movl $15,%edi |
| 48 jmp L000x86_loop |
| 49 .align 4,0x90 |
| 50 L000x86_loop: |
| 51 movb %bl,%al |
| 52 shrdl $4,%ecx,%ebx |
| 53 andb $15,%al |
| 54 shrdl $4,%edx,%ecx |
| 55 shrdl $4,%ebp,%edx |
| 56 shrl $4,%ebp |
| 57 xorl 16(%esp,%eax,4),%ebp |
| 58 movb (%esp,%edi,1),%al |
| 59 andb $240,%al |
| 60 xorl 8(%esi,%eax,1),%ebx |
| 61 xorl 12(%esi,%eax,1),%ecx |
| 62 xorl (%esi,%eax,1),%edx |
| 63 xorl 4(%esi,%eax,1),%ebp |
| 64 decl %edi |
| 65 js L001x86_break |
| 66 movb %bl,%al |
| 67 shrdl $4,%ecx,%ebx |
| 68 andb $15,%al |
| 69 shrdl $4,%edx,%ecx |
| 70 shrdl $4,%ebp,%edx |
| 71 shrl $4,%ebp |
| 72 xorl 16(%esp,%eax,4),%ebp |
| 73 movb (%esp,%edi,1),%al |
| 74 shlb $4,%al |
| 75 xorl 8(%esi,%eax,1),%ebx |
| 76 xorl 12(%esi,%eax,1),%ecx |
| 77 xorl (%esi,%eax,1),%edx |
| 78 xorl 4(%esi,%eax,1),%ebp |
| 79 jmp L000x86_loop |
| 80 .align 4,0x90 |
| 81 L001x86_break: |
| 82 bswap %ebx |
| 83 bswap %ecx |
| 84 bswap %edx |
| 85 bswap %ebp |
| 86 movl 104(%esp),%edi |
| 87 movl %ebx,12(%edi) |
| 88 movl %ecx,8(%edi) |
| 89 movl %edx,4(%edi) |
| 90 movl %ebp,(%edi) |
| 91 addl $84,%esp |
| 92 popl %edi |
| 93 popl %esi |
| 94 popl %ebx |
| 95 popl %ebp |
| 96 ret |
| 97 .globl _gcm_ghash_4bit_x86 |
| 98 .private_extern _gcm_ghash_4bit_x86 |
| 99 .align 4 |
| 100 _gcm_ghash_4bit_x86: |
| 101 L_gcm_ghash_4bit_x86_begin: |
| 102 pushl %ebp |
| 103 pushl %ebx |
| 104 pushl %esi |
| 105 pushl %edi |
| 106 subl $84,%esp |
| 107 movl 104(%esp),%ebx |
| 108 movl 108(%esp),%esi |
| 109 movl 112(%esp),%edi |
| 110 movl 116(%esp),%ecx |
| 111 addl %edi,%ecx |
| 112 movl %ecx,116(%esp) |
| 113 movl (%ebx),%ebp |
| 114 movl 4(%ebx),%edx |
| 115 movl 8(%ebx),%ecx |
| 116 movl 12(%ebx),%ebx |
| 117 movl $0,16(%esp) |
| 118 movl $471859200,20(%esp) |
| 119 movl $943718400,24(%esp) |
| 120 movl $610271232,28(%esp) |
| 121 movl $1887436800,32(%esp) |
| 122 movl $1822425088,36(%esp) |
| 123 movl $1220542464,40(%esp) |
| 124 movl $1423966208,44(%esp) |
| 125 movl $3774873600,48(%esp) |
| 126 movl $4246732800,52(%esp) |
| 127 movl $3644850176,56(%esp) |
| 128 movl $3311403008,60(%esp) |
| 129 movl $2441084928,64(%esp) |
| 130 movl $2376073216,68(%esp) |
| 131 movl $2847932416,72(%esp) |
| 132 movl $3051356160,76(%esp) |
| 133 .align 4,0x90 |
| 134 L002x86_outer_loop: |
| 135 xorl 12(%edi),%ebx |
| 136 xorl 8(%edi),%ecx |
| 137 xorl 4(%edi),%edx |
| 138 xorl (%edi),%ebp |
| 139 movl %ebx,12(%esp) |
| 140 movl %ecx,8(%esp) |
| 141 movl %edx,4(%esp) |
| 142 movl %ebp,(%esp) |
| 143 shrl $20,%ebx |
| 144 andl $240,%ebx |
| 145 movl 4(%esi,%ebx,1),%ebp |
| 146 movl (%esi,%ebx,1),%edx |
| 147 movl 12(%esi,%ebx,1),%ecx |
| 148 movl 8(%esi,%ebx,1),%ebx |
| 149 xorl %eax,%eax |
| 150 movl $15,%edi |
| 151 jmp L003x86_loop |
| 152 .align 4,0x90 |
| 153 L003x86_loop: |
| 154 movb %bl,%al |
| 155 shrdl $4,%ecx,%ebx |
| 156 andb $15,%al |
| 157 shrdl $4,%edx,%ecx |
| 158 shrdl $4,%ebp,%edx |
| 159 shrl $4,%ebp |
| 160 xorl 16(%esp,%eax,4),%ebp |
| 161 movb (%esp,%edi,1),%al |
| 162 andb $240,%al |
| 163 xorl 8(%esi,%eax,1),%ebx |
| 164 xorl 12(%esi,%eax,1),%ecx |
| 165 xorl (%esi,%eax,1),%edx |
| 166 xorl 4(%esi,%eax,1),%ebp |
| 167 decl %edi |
| 168 js L004x86_break |
| 169 movb %bl,%al |
| 170 shrdl $4,%ecx,%ebx |
| 171 andb $15,%al |
| 172 shrdl $4,%edx,%ecx |
| 173 shrdl $4,%ebp,%edx |
| 174 shrl $4,%ebp |
| 175 xorl 16(%esp,%eax,4),%ebp |
| 176 movb (%esp,%edi,1),%al |
| 177 shlb $4,%al |
| 178 xorl 8(%esi,%eax,1),%ebx |
| 179 xorl 12(%esi,%eax,1),%ecx |
| 180 xorl (%esi,%eax,1),%edx |
| 181 xorl 4(%esi,%eax,1),%ebp |
| 182 jmp L003x86_loop |
| 183 .align 4,0x90 |
| 184 L004x86_break: |
| 185 bswap %ebx |
| 186 bswap %ecx |
| 187 bswap %edx |
| 188 bswap %ebp |
| 189 movl 112(%esp),%edi |
| 190 leal 16(%edi),%edi |
| 191 cmpl 116(%esp),%edi |
| 192 movl %edi,112(%esp) |
| 193 jb L002x86_outer_loop |
| 194 movl 104(%esp),%edi |
| 195 movl %ebx,12(%edi) |
| 196 movl %ecx,8(%edi) |
| 197 movl %edx,4(%edi) |
| 198 movl %ebp,(%edi) |
| 199 addl $84,%esp |
| 200 popl %edi |
| 201 popl %esi |
| 202 popl %ebx |
| 203 popl %ebp |
| 204 ret |
| 205 .globl _gcm_gmult_4bit_mmx |
| 206 .private_extern _gcm_gmult_4bit_mmx |
| 207 .align 4 |
| 208 _gcm_gmult_4bit_mmx: |
| 209 L_gcm_gmult_4bit_mmx_begin: |
| 210 pushl %ebp |
| 211 pushl %ebx |
| 212 pushl %esi |
| 213 pushl %edi |
| 214 movl 20(%esp),%edi |
| 215 movl 24(%esp),%esi |
| 216 call L005pic_point |
| 217 L005pic_point: |
| 218 popl %eax |
| 219 leal Lrem_4bit-L005pic_point(%eax),%eax |
| 220 movzbl 15(%edi),%ebx |
| 221 xorl %ecx,%ecx |
| 222 movl %ebx,%edx |
| 223 movb %dl,%cl |
| 224 movl $14,%ebp |
| 225 shlb $4,%cl |
| 226 andl $240,%edx |
| 227 movq 8(%esi,%ecx,1),%mm0 |
| 228 movq (%esi,%ecx,1),%mm1 |
| 229 movd %mm0,%ebx |
| 230 jmp L006mmx_loop |
| 231 .align 4,0x90 |
| 232 L006mmx_loop: |
| 233 psrlq $4,%mm0 |
| 234 andl $15,%ebx |
| 235 movq %mm1,%mm2 |
| 236 psrlq $4,%mm1 |
| 237 pxor 8(%esi,%edx,1),%mm0 |
| 238 movb (%edi,%ebp,1),%cl |
| 239 psllq $60,%mm2 |
| 240 pxor (%eax,%ebx,8),%mm1 |
| 241 decl %ebp |
| 242 movd %mm0,%ebx |
| 243 pxor (%esi,%edx,1),%mm1 |
| 244 movl %ecx,%edx |
| 245 pxor %mm2,%mm0 |
| 246 js L007mmx_break |
| 247 shlb $4,%cl |
| 248 andl $15,%ebx |
| 249 psrlq $4,%mm0 |
| 250 andl $240,%edx |
| 251 movq %mm1,%mm2 |
| 252 psrlq $4,%mm1 |
| 253 pxor 8(%esi,%ecx,1),%mm0 |
| 254 psllq $60,%mm2 |
| 255 pxor (%eax,%ebx,8),%mm1 |
| 256 movd %mm0,%ebx |
| 257 pxor (%esi,%ecx,1),%mm1 |
| 258 pxor %mm2,%mm0 |
| 259 jmp L006mmx_loop |
| 260 .align 4,0x90 |
| 261 L007mmx_break: |
| 262 shlb $4,%cl |
| 263 andl $15,%ebx |
| 264 psrlq $4,%mm0 |
| 265 andl $240,%edx |
| 266 movq %mm1,%mm2 |
| 267 psrlq $4,%mm1 |
| 268 pxor 8(%esi,%ecx,1),%mm0 |
| 269 psllq $60,%mm2 |
| 270 pxor (%eax,%ebx,8),%mm1 |
| 271 movd %mm0,%ebx |
| 272 pxor (%esi,%ecx,1),%mm1 |
| 273 pxor %mm2,%mm0 |
| 274 psrlq $4,%mm0 |
| 275 andl $15,%ebx |
| 276 movq %mm1,%mm2 |
| 277 psrlq $4,%mm1 |
| 278 pxor 8(%esi,%edx,1),%mm0 |
| 279 psllq $60,%mm2 |
| 280 pxor (%eax,%ebx,8),%mm1 |
| 281 movd %mm0,%ebx |
| 282 pxor (%esi,%edx,1),%mm1 |
| 283 pxor %mm2,%mm0 |
| 284 psrlq $32,%mm0 |
| 285 movd %mm1,%edx |
| 286 psrlq $32,%mm1 |
| 287 movd %mm0,%ecx |
| 288 movd %mm1,%ebp |
| 289 bswap %ebx |
| 290 bswap %edx |
| 291 bswap %ecx |
| 292 bswap %ebp |
| 293 emms |
| 294 movl %ebx,12(%edi) |
| 295 movl %edx,4(%edi) |
| 296 movl %ecx,8(%edi) |
| 297 movl %ebp,(%edi) |
| 298 popl %edi |
| 299 popl %esi |
| 300 popl %ebx |
| 301 popl %ebp |
| 302 ret |
| 303 .globl _gcm_ghash_4bit_mmx |
| 304 .private_extern _gcm_ghash_4bit_mmx |
| 305 .align 4 |
| 306 _gcm_ghash_4bit_mmx: |
| 307 L_gcm_ghash_4bit_mmx_begin: |
| 308 pushl %ebp |
| 309 pushl %ebx |
| 310 pushl %esi |
| 311 pushl %edi |
| 312 movl 20(%esp),%eax |
| 313 movl 24(%esp),%ebx |
| 314 movl 28(%esp),%ecx |
| 315 movl 32(%esp),%edx |
| 316 movl %esp,%ebp |
| 317 call L008pic_point |
| 318 L008pic_point: |
| 319 popl %esi |
| 320 leal Lrem_8bit-L008pic_point(%esi),%esi |
| 321 subl $544,%esp |
| 322 andl $-64,%esp |
| 323 subl $16,%esp |
| 324 addl %ecx,%edx |
| 325 movl %eax,544(%esp) |
| 326 movl %edx,552(%esp) |
| 327 movl %ebp,556(%esp) |
| 328 addl $128,%ebx |
| 329 leal 144(%esp),%edi |
| 330 leal 400(%esp),%ebp |
| 331 movl -120(%ebx),%edx |
| 332 movq -120(%ebx),%mm0 |
| 333 movq -128(%ebx),%mm3 |
| 334 shll $4,%edx |
| 335 movb %dl,(%esp) |
| 336 movl -104(%ebx),%edx |
| 337 movq -104(%ebx),%mm2 |
| 338 movq -112(%ebx),%mm5 |
| 339 movq %mm0,-128(%edi) |
| 340 psrlq $4,%mm0 |
| 341 movq %mm3,(%edi) |
| 342 movq %mm3,%mm7 |
| 343 psrlq $4,%mm3 |
| 344 shll $4,%edx |
| 345 movb %dl,1(%esp) |
| 346 movl -88(%ebx),%edx |
| 347 movq -88(%ebx),%mm1 |
| 348 psllq $60,%mm7 |
| 349 movq -96(%ebx),%mm4 |
| 350 por %mm7,%mm0 |
| 351 movq %mm2,-120(%edi) |
| 352 psrlq $4,%mm2 |
| 353 movq %mm5,8(%edi) |
| 354 movq %mm5,%mm6 |
| 355 movq %mm0,-128(%ebp) |
| 356 psrlq $4,%mm5 |
| 357 movq %mm3,(%ebp) |
| 358 shll $4,%edx |
| 359 movb %dl,2(%esp) |
| 360 movl -72(%ebx),%edx |
| 361 movq -72(%ebx),%mm0 |
| 362 psllq $60,%mm6 |
| 363 movq -80(%ebx),%mm3 |
| 364 por %mm6,%mm2 |
| 365 movq %mm1,-112(%edi) |
| 366 psrlq $4,%mm1 |
| 367 movq %mm4,16(%edi) |
| 368 movq %mm4,%mm7 |
| 369 movq %mm2,-120(%ebp) |
| 370 psrlq $4,%mm4 |
| 371 movq %mm5,8(%ebp) |
| 372 shll $4,%edx |
| 373 movb %dl,3(%esp) |
| 374 movl -56(%ebx),%edx |
| 375 movq -56(%ebx),%mm2 |
| 376 psllq $60,%mm7 |
| 377 movq -64(%ebx),%mm5 |
| 378 por %mm7,%mm1 |
| 379 movq %mm0,-104(%edi) |
| 380 psrlq $4,%mm0 |
| 381 movq %mm3,24(%edi) |
| 382 movq %mm3,%mm6 |
| 383 movq %mm1,-112(%ebp) |
| 384 psrlq $4,%mm3 |
| 385 movq %mm4,16(%ebp) |
| 386 shll $4,%edx |
| 387 movb %dl,4(%esp) |
| 388 movl -40(%ebx),%edx |
| 389 movq -40(%ebx),%mm1 |
| 390 psllq $60,%mm6 |
| 391 movq -48(%ebx),%mm4 |
| 392 por %mm6,%mm0 |
| 393 movq %mm2,-96(%edi) |
| 394 psrlq $4,%mm2 |
| 395 movq %mm5,32(%edi) |
| 396 movq %mm5,%mm7 |
| 397 movq %mm0,-104(%ebp) |
| 398 psrlq $4,%mm5 |
| 399 movq %mm3,24(%ebp) |
| 400 shll $4,%edx |
| 401 movb %dl,5(%esp) |
| 402 movl -24(%ebx),%edx |
| 403 movq -24(%ebx),%mm0 |
| 404 psllq $60,%mm7 |
| 405 movq -32(%ebx),%mm3 |
| 406 por %mm7,%mm2 |
| 407 movq %mm1,-88(%edi) |
| 408 psrlq $4,%mm1 |
| 409 movq %mm4,40(%edi) |
| 410 movq %mm4,%mm6 |
| 411 movq %mm2,-96(%ebp) |
| 412 psrlq $4,%mm4 |
| 413 movq %mm5,32(%ebp) |
| 414 shll $4,%edx |
| 415 movb %dl,6(%esp) |
| 416 movl -8(%ebx),%edx |
| 417 movq -8(%ebx),%mm2 |
| 418 psllq $60,%mm6 |
| 419 movq -16(%ebx),%mm5 |
| 420 por %mm6,%mm1 |
| 421 movq %mm0,-80(%edi) |
| 422 psrlq $4,%mm0 |
| 423 movq %mm3,48(%edi) |
| 424 movq %mm3,%mm7 |
| 425 movq %mm1,-88(%ebp) |
| 426 psrlq $4,%mm3 |
| 427 movq %mm4,40(%ebp) |
| 428 shll $4,%edx |
| 429 movb %dl,7(%esp) |
| 430 movl 8(%ebx),%edx |
| 431 movq 8(%ebx),%mm1 |
| 432 psllq $60,%mm7 |
| 433 movq (%ebx),%mm4 |
| 434 por %mm7,%mm0 |
| 435 movq %mm2,-72(%edi) |
| 436 psrlq $4,%mm2 |
| 437 movq %mm5,56(%edi) |
| 438 movq %mm5,%mm6 |
| 439 movq %mm0,-80(%ebp) |
| 440 psrlq $4,%mm5 |
| 441 movq %mm3,48(%ebp) |
| 442 shll $4,%edx |
| 443 movb %dl,8(%esp) |
| 444 movl 24(%ebx),%edx |
| 445 movq 24(%ebx),%mm0 |
| 446 psllq $60,%mm6 |
| 447 movq 16(%ebx),%mm3 |
| 448 por %mm6,%mm2 |
| 449 movq %mm1,-64(%edi) |
| 450 psrlq $4,%mm1 |
| 451 movq %mm4,64(%edi) |
| 452 movq %mm4,%mm7 |
| 453 movq %mm2,-72(%ebp) |
| 454 psrlq $4,%mm4 |
| 455 movq %mm5,56(%ebp) |
| 456 shll $4,%edx |
| 457 movb %dl,9(%esp) |
| 458 movl 40(%ebx),%edx |
| 459 movq 40(%ebx),%mm2 |
| 460 psllq $60,%mm7 |
| 461 movq 32(%ebx),%mm5 |
| 462 por %mm7,%mm1 |
| 463 movq %mm0,-56(%edi) |
| 464 psrlq $4,%mm0 |
| 465 movq %mm3,72(%edi) |
| 466 movq %mm3,%mm6 |
| 467 movq %mm1,-64(%ebp) |
| 468 psrlq $4,%mm3 |
| 469 movq %mm4,64(%ebp) |
| 470 shll $4,%edx |
| 471 movb %dl,10(%esp) |
| 472 movl 56(%ebx),%edx |
| 473 movq 56(%ebx),%mm1 |
| 474 psllq $60,%mm6 |
| 475 movq 48(%ebx),%mm4 |
| 476 por %mm6,%mm0 |
| 477 movq %mm2,-48(%edi) |
| 478 psrlq $4,%mm2 |
| 479 movq %mm5,80(%edi) |
| 480 movq %mm5,%mm7 |
| 481 movq %mm0,-56(%ebp) |
| 482 psrlq $4,%mm5 |
| 483 movq %mm3,72(%ebp) |
| 484 shll $4,%edx |
| 485 movb %dl,11(%esp) |
| 486 movl 72(%ebx),%edx |
| 487 movq 72(%ebx),%mm0 |
| 488 psllq $60,%mm7 |
| 489 movq 64(%ebx),%mm3 |
| 490 por %mm7,%mm2 |
| 491 movq %mm1,-40(%edi) |
| 492 psrlq $4,%mm1 |
| 493 movq %mm4,88(%edi) |
| 494 movq %mm4,%mm6 |
| 495 movq %mm2,-48(%ebp) |
| 496 psrlq $4,%mm4 |
| 497 movq %mm5,80(%ebp) |
| 498 shll $4,%edx |
| 499 movb %dl,12(%esp) |
| 500 movl 88(%ebx),%edx |
| 501 movq 88(%ebx),%mm2 |
| 502 psllq $60,%mm6 |
| 503 movq 80(%ebx),%mm5 |
| 504 por %mm6,%mm1 |
| 505 movq %mm0,-32(%edi) |
| 506 psrlq $4,%mm0 |
| 507 movq %mm3,96(%edi) |
| 508 movq %mm3,%mm7 |
| 509 movq %mm1,-40(%ebp) |
| 510 psrlq $4,%mm3 |
| 511 movq %mm4,88(%ebp) |
| 512 shll $4,%edx |
| 513 movb %dl,13(%esp) |
| 514 movl 104(%ebx),%edx |
| 515 movq 104(%ebx),%mm1 |
| 516 psllq $60,%mm7 |
| 517 movq 96(%ebx),%mm4 |
| 518 por %mm7,%mm0 |
| 519 movq %mm2,-24(%edi) |
| 520 psrlq $4,%mm2 |
| 521 movq %mm5,104(%edi) |
| 522 movq %mm5,%mm6 |
| 523 movq %mm0,-32(%ebp) |
| 524 psrlq $4,%mm5 |
| 525 movq %mm3,96(%ebp) |
| 526 shll $4,%edx |
| 527 movb %dl,14(%esp) |
| 528 movl 120(%ebx),%edx |
| 529 movq 120(%ebx),%mm0 |
| 530 psllq $60,%mm6 |
| 531 movq 112(%ebx),%mm3 |
| 532 por %mm6,%mm2 |
| 533 movq %mm1,-16(%edi) |
| 534 psrlq $4,%mm1 |
| 535 movq %mm4,112(%edi) |
| 536 movq %mm4,%mm7 |
| 537 movq %mm2,-24(%ebp) |
| 538 psrlq $4,%mm4 |
| 539 movq %mm5,104(%ebp) |
| 540 shll $4,%edx |
| 541 movb %dl,15(%esp) |
| 542 psllq $60,%mm7 |
| 543 por %mm7,%mm1 |
| 544 movq %mm0,-8(%edi) |
| 545 psrlq $4,%mm0 |
| 546 movq %mm3,120(%edi) |
| 547 movq %mm3,%mm6 |
| 548 movq %mm1,-16(%ebp) |
| 549 psrlq $4,%mm3 |
| 550 movq %mm4,112(%ebp) |
| 551 psllq $60,%mm6 |
| 552 por %mm6,%mm0 |
| 553 movq %mm0,-8(%ebp) |
| 554 movq %mm3,120(%ebp) |
| 555 movq (%eax),%mm6 |
| 556 movl 8(%eax),%ebx |
| 557 movl 12(%eax),%edx |
| 558 .align 4,0x90 |
| 559 L009outer: |
| 560 xorl 12(%ecx),%edx |
| 561 xorl 8(%ecx),%ebx |
| 562 pxor (%ecx),%mm6 |
| 563 leal 16(%ecx),%ecx |
| 564 movl %ebx,536(%esp) |
| 565 movq %mm6,528(%esp) |
| 566 movl %ecx,548(%esp) |
| 567 xorl %eax,%eax |
| 568 roll $8,%edx |
| 569 movb %dl,%al |
| 570 movl %eax,%ebp |
| 571 andb $15,%al |
| 572 shrl $4,%ebp |
| 573 pxor %mm0,%mm0 |
| 574 roll $8,%edx |
| 575 pxor %mm1,%mm1 |
| 576 pxor %mm2,%mm2 |
| 577 movq 16(%esp,%eax,8),%mm7 |
| 578 movq 144(%esp,%eax,8),%mm6 |
| 579 movb %dl,%al |
| 580 movd %mm7,%ebx |
| 581 psrlq $8,%mm7 |
| 582 movq %mm6,%mm3 |
| 583 movl %eax,%edi |
| 584 psrlq $8,%mm6 |
| 585 pxor 272(%esp,%ebp,8),%mm7 |
| 586 andb $15,%al |
| 587 psllq $56,%mm3 |
| 588 shrl $4,%edi |
| 589 pxor 16(%esp,%eax,8),%mm7 |
| 590 roll $8,%edx |
| 591 pxor 144(%esp,%eax,8),%mm6 |
| 592 pxor %mm3,%mm7 |
| 593 pxor 400(%esp,%ebp,8),%mm6 |
| 594 xorb (%esp,%ebp,1),%bl |
| 595 movb %dl,%al |
| 596 movd %mm7,%ecx |
| 597 movzbl %bl,%ebx |
| 598 psrlq $8,%mm7 |
| 599 movq %mm6,%mm3 |
| 600 movl %eax,%ebp |
| 601 psrlq $8,%mm6 |
| 602 pxor 272(%esp,%edi,8),%mm7 |
| 603 andb $15,%al |
| 604 psllq $56,%mm3 |
| 605 shrl $4,%ebp |
| 606 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 607 pxor 16(%esp,%eax,8),%mm7 |
| 608 roll $8,%edx |
| 609 pxor 144(%esp,%eax,8),%mm6 |
| 610 pxor %mm3,%mm7 |
| 611 pxor 400(%esp,%edi,8),%mm6 |
| 612 xorb (%esp,%edi,1),%cl |
| 613 movb %dl,%al |
| 614 movl 536(%esp),%edx |
| 615 movd %mm7,%ebx |
| 616 movzbl %cl,%ecx |
| 617 psrlq $8,%mm7 |
| 618 movq %mm6,%mm3 |
| 619 movl %eax,%edi |
| 620 psrlq $8,%mm6 |
| 621 pxor 272(%esp,%ebp,8),%mm7 |
| 622 andb $15,%al |
| 623 psllq $56,%mm3 |
| 624 pxor %mm2,%mm6 |
| 625 shrl $4,%edi |
| 626 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 627 pxor 16(%esp,%eax,8),%mm7 |
| 628 roll $8,%edx |
| 629 pxor 144(%esp,%eax,8),%mm6 |
| 630 pxor %mm3,%mm7 |
| 631 pxor 400(%esp,%ebp,8),%mm6 |
| 632 xorb (%esp,%ebp,1),%bl |
| 633 movb %dl,%al |
| 634 movd %mm7,%ecx |
| 635 movzbl %bl,%ebx |
| 636 psrlq $8,%mm7 |
| 637 movq %mm6,%mm3 |
| 638 movl %eax,%ebp |
| 639 psrlq $8,%mm6 |
| 640 pxor 272(%esp,%edi,8),%mm7 |
| 641 andb $15,%al |
| 642 psllq $56,%mm3 |
| 643 pxor %mm1,%mm6 |
| 644 shrl $4,%ebp |
| 645 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 646 pxor 16(%esp,%eax,8),%mm7 |
| 647 roll $8,%edx |
| 648 pxor 144(%esp,%eax,8),%mm6 |
| 649 pxor %mm3,%mm7 |
| 650 pxor 400(%esp,%edi,8),%mm6 |
| 651 xorb (%esp,%edi,1),%cl |
| 652 movb %dl,%al |
| 653 movd %mm7,%ebx |
| 654 movzbl %cl,%ecx |
| 655 psrlq $8,%mm7 |
| 656 movq %mm6,%mm3 |
| 657 movl %eax,%edi |
| 658 psrlq $8,%mm6 |
| 659 pxor 272(%esp,%ebp,8),%mm7 |
| 660 andb $15,%al |
| 661 psllq $56,%mm3 |
| 662 pxor %mm0,%mm6 |
| 663 shrl $4,%edi |
| 664 pinsrw $2,(%esi,%ecx,2),%mm2 |
| 665 pxor 16(%esp,%eax,8),%mm7 |
| 666 roll $8,%edx |
| 667 pxor 144(%esp,%eax,8),%mm6 |
| 668 pxor %mm3,%mm7 |
| 669 pxor 400(%esp,%ebp,8),%mm6 |
| 670 xorb (%esp,%ebp,1),%bl |
| 671 movb %dl,%al |
| 672 movd %mm7,%ecx |
| 673 movzbl %bl,%ebx |
| 674 psrlq $8,%mm7 |
| 675 movq %mm6,%mm3 |
| 676 movl %eax,%ebp |
| 677 psrlq $8,%mm6 |
| 678 pxor 272(%esp,%edi,8),%mm7 |
| 679 andb $15,%al |
| 680 psllq $56,%mm3 |
| 681 pxor %mm2,%mm6 |
| 682 shrl $4,%ebp |
| 683 pinsrw $2,(%esi,%ebx,2),%mm1 |
| 684 pxor 16(%esp,%eax,8),%mm7 |
| 685 roll $8,%edx |
| 686 pxor 144(%esp,%eax,8),%mm6 |
| 687 pxor %mm3,%mm7 |
| 688 pxor 400(%esp,%edi,8),%mm6 |
| 689 xorb (%esp,%edi,1),%cl |
| 690 movb %dl,%al |
| 691 movl 532(%esp),%edx |
| 692 movd %mm7,%ebx |
| 693 movzbl %cl,%ecx |
| 694 psrlq $8,%mm7 |
| 695 movq %mm6,%mm3 |
| 696 movl %eax,%edi |
| 697 psrlq $8,%mm6 |
| 698 pxor 272(%esp,%ebp,8),%mm7 |
| 699 andb $15,%al |
| 700 psllq $56,%mm3 |
| 701 pxor %mm1,%mm6 |
| 702 shrl $4,%edi |
| 703 pinsrw $2,(%esi,%ecx,2),%mm0 |
| 704 pxor 16(%esp,%eax,8),%mm7 |
| 705 roll $8,%edx |
| 706 pxor 144(%esp,%eax,8),%mm6 |
| 707 pxor %mm3,%mm7 |
| 708 pxor 400(%esp,%ebp,8),%mm6 |
| 709 xorb (%esp,%ebp,1),%bl |
| 710 movb %dl,%al |
| 711 movd %mm7,%ecx |
| 712 movzbl %bl,%ebx |
| 713 psrlq $8,%mm7 |
| 714 movq %mm6,%mm3 |
| 715 movl %eax,%ebp |
| 716 psrlq $8,%mm6 |
| 717 pxor 272(%esp,%edi,8),%mm7 |
| 718 andb $15,%al |
| 719 psllq $56,%mm3 |
| 720 pxor %mm0,%mm6 |
| 721 shrl $4,%ebp |
| 722 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 723 pxor 16(%esp,%eax,8),%mm7 |
| 724 roll $8,%edx |
| 725 pxor 144(%esp,%eax,8),%mm6 |
| 726 pxor %mm3,%mm7 |
| 727 pxor 400(%esp,%edi,8),%mm6 |
| 728 xorb (%esp,%edi,1),%cl |
| 729 movb %dl,%al |
| 730 movd %mm7,%ebx |
| 731 movzbl %cl,%ecx |
| 732 psrlq $8,%mm7 |
| 733 movq %mm6,%mm3 |
| 734 movl %eax,%edi |
| 735 psrlq $8,%mm6 |
| 736 pxor 272(%esp,%ebp,8),%mm7 |
| 737 andb $15,%al |
| 738 psllq $56,%mm3 |
| 739 pxor %mm2,%mm6 |
| 740 shrl $4,%edi |
| 741 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 742 pxor 16(%esp,%eax,8),%mm7 |
| 743 roll $8,%edx |
| 744 pxor 144(%esp,%eax,8),%mm6 |
| 745 pxor %mm3,%mm7 |
| 746 pxor 400(%esp,%ebp,8),%mm6 |
| 747 xorb (%esp,%ebp,1),%bl |
| 748 movb %dl,%al |
| 749 movd %mm7,%ecx |
| 750 movzbl %bl,%ebx |
| 751 psrlq $8,%mm7 |
| 752 movq %mm6,%mm3 |
| 753 movl %eax,%ebp |
| 754 psrlq $8,%mm6 |
| 755 pxor 272(%esp,%edi,8),%mm7 |
| 756 andb $15,%al |
| 757 psllq $56,%mm3 |
| 758 pxor %mm1,%mm6 |
| 759 shrl $4,%ebp |
| 760 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 761 pxor 16(%esp,%eax,8),%mm7 |
| 762 roll $8,%edx |
| 763 pxor 144(%esp,%eax,8),%mm6 |
| 764 pxor %mm3,%mm7 |
| 765 pxor 400(%esp,%edi,8),%mm6 |
| 766 xorb (%esp,%edi,1),%cl |
| 767 movb %dl,%al |
| 768 movl 528(%esp),%edx |
| 769 movd %mm7,%ebx |
| 770 movzbl %cl,%ecx |
| 771 psrlq $8,%mm7 |
| 772 movq %mm6,%mm3 |
| 773 movl %eax,%edi |
| 774 psrlq $8,%mm6 |
| 775 pxor 272(%esp,%ebp,8),%mm7 |
| 776 andb $15,%al |
| 777 psllq $56,%mm3 |
| 778 pxor %mm0,%mm6 |
| 779 shrl $4,%edi |
| 780 pinsrw $2,(%esi,%ecx,2),%mm2 |
| 781 pxor 16(%esp,%eax,8),%mm7 |
| 782 roll $8,%edx |
| 783 pxor 144(%esp,%eax,8),%mm6 |
| 784 pxor %mm3,%mm7 |
| 785 pxor 400(%esp,%ebp,8),%mm6 |
| 786 xorb (%esp,%ebp,1),%bl |
| 787 movb %dl,%al |
| 788 movd %mm7,%ecx |
| 789 movzbl %bl,%ebx |
| 790 psrlq $8,%mm7 |
| 791 movq %mm6,%mm3 |
| 792 movl %eax,%ebp |
| 793 psrlq $8,%mm6 |
| 794 pxor 272(%esp,%edi,8),%mm7 |
| 795 andb $15,%al |
| 796 psllq $56,%mm3 |
| 797 pxor %mm2,%mm6 |
| 798 shrl $4,%ebp |
| 799 pinsrw $2,(%esi,%ebx,2),%mm1 |
| 800 pxor 16(%esp,%eax,8),%mm7 |
| 801 roll $8,%edx |
| 802 pxor 144(%esp,%eax,8),%mm6 |
| 803 pxor %mm3,%mm7 |
| 804 pxor 400(%esp,%edi,8),%mm6 |
| 805 xorb (%esp,%edi,1),%cl |
| 806 movb %dl,%al |
| 807 movd %mm7,%ebx |
| 808 movzbl %cl,%ecx |
| 809 psrlq $8,%mm7 |
| 810 movq %mm6,%mm3 |
| 811 movl %eax,%edi |
| 812 psrlq $8,%mm6 |
| 813 pxor 272(%esp,%ebp,8),%mm7 |
| 814 andb $15,%al |
| 815 psllq $56,%mm3 |
| 816 pxor %mm1,%mm6 |
| 817 shrl $4,%edi |
| 818 pinsrw $2,(%esi,%ecx,2),%mm0 |
| 819 pxor 16(%esp,%eax,8),%mm7 |
| 820 roll $8,%edx |
| 821 pxor 144(%esp,%eax,8),%mm6 |
| 822 pxor %mm3,%mm7 |
| 823 pxor 400(%esp,%ebp,8),%mm6 |
| 824 xorb (%esp,%ebp,1),%bl |
| 825 movb %dl,%al |
| 826 movd %mm7,%ecx |
| 827 movzbl %bl,%ebx |
| 828 psrlq $8,%mm7 |
| 829 movq %mm6,%mm3 |
| 830 movl %eax,%ebp |
| 831 psrlq $8,%mm6 |
| 832 pxor 272(%esp,%edi,8),%mm7 |
| 833 andb $15,%al |
| 834 psllq $56,%mm3 |
| 835 pxor %mm0,%mm6 |
| 836 shrl $4,%ebp |
| 837 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 838 pxor 16(%esp,%eax,8),%mm7 |
| 839 roll $8,%edx |
| 840 pxor 144(%esp,%eax,8),%mm6 |
| 841 pxor %mm3,%mm7 |
| 842 pxor 400(%esp,%edi,8),%mm6 |
| 843 xorb (%esp,%edi,1),%cl |
| 844 movb %dl,%al |
| 845 movl 524(%esp),%edx |
| 846 movd %mm7,%ebx |
| 847 movzbl %cl,%ecx |
| 848 psrlq $8,%mm7 |
| 849 movq %mm6,%mm3 |
| 850 movl %eax,%edi |
| 851 psrlq $8,%mm6 |
| 852 pxor 272(%esp,%ebp,8),%mm7 |
| 853 andb $15,%al |
| 854 psllq $56,%mm3 |
| 855 pxor %mm2,%mm6 |
| 856 shrl $4,%edi |
| 857 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 858 pxor 16(%esp,%eax,8),%mm7 |
| 859 pxor 144(%esp,%eax,8),%mm6 |
| 860 xorb (%esp,%ebp,1),%bl |
| 861 pxor %mm3,%mm7 |
| 862 pxor 400(%esp,%ebp,8),%mm6 |
| 863 movzbl %bl,%ebx |
| 864 pxor %mm2,%mm2 |
| 865 psllq $4,%mm1 |
| 866 movd %mm7,%ecx |
| 867 psrlq $4,%mm7 |
| 868 movq %mm6,%mm3 |
| 869 psrlq $4,%mm6 |
| 870 shll $4,%ecx |
| 871 pxor 16(%esp,%edi,8),%mm7 |
| 872 psllq $60,%mm3 |
| 873 movzbl %cl,%ecx |
| 874 pxor %mm3,%mm7 |
| 875 pxor 144(%esp,%edi,8),%mm6 |
| 876 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 877 pxor %mm1,%mm6 |
| 878 movd %mm7,%edx |
| 879 pinsrw $3,(%esi,%ecx,2),%mm2 |
| 880 psllq $12,%mm0 |
| 881 pxor %mm0,%mm6 |
| 882 psrlq $32,%mm7 |
| 883 pxor %mm2,%mm6 |
| 884 movl 548(%esp),%ecx |
| 885 movd %mm7,%ebx |
| 886 movq %mm6,%mm3 |
| 887 psllw $8,%mm6 |
| 888 psrlw $8,%mm3 |
| 889 por %mm3,%mm6 |
| 890 bswap %edx |
| 891 pshufw $27,%mm6,%mm6 |
| 892 bswap %ebx |
| 893 cmpl 552(%esp),%ecx |
| 894 jne L009outer |
| 895 movl 544(%esp),%eax |
| 896 movl %edx,12(%eax) |
| 897 movl %ebx,8(%eax) |
| 898 movq %mm6,(%eax) |
| 899 movl 556(%esp),%esp |
| 900 emms |
| 901 popl %edi |
| 902 popl %esi |
| 903 popl %ebx |
| 904 popl %ebp |
| 905 ret |
| 906 .globl _gcm_init_clmul |
| 907 .private_extern _gcm_init_clmul |
| 908 .align 4 |
| 909 _gcm_init_clmul: |
| 910 L_gcm_init_clmul_begin: |
| 911 movl 4(%esp),%edx |
| 912 movl 8(%esp),%eax |
| 913 call L010pic |
| 914 L010pic: |
| 915 popl %ecx |
| 916 leal Lbswap-L010pic(%ecx),%ecx |
| 917 movdqu (%eax),%xmm2 |
| 918 pshufd $78,%xmm2,%xmm2 |
| 919 pshufd $255,%xmm2,%xmm4 |
| 920 movdqa %xmm2,%xmm3 |
| 921 psllq $1,%xmm2 |
| 922 pxor %xmm5,%xmm5 |
| 923 psrlq $63,%xmm3 |
| 924 pcmpgtd %xmm4,%xmm5 |
| 925 pslldq $8,%xmm3 |
| 926 por %xmm3,%xmm2 |
| 927 pand 16(%ecx),%xmm5 |
| 928 pxor %xmm5,%xmm2 |
| 929 movdqa %xmm2,%xmm0 |
| 930 movdqa %xmm0,%xmm1 |
| 931 pshufd $78,%xmm0,%xmm3 |
| 932 pshufd $78,%xmm2,%xmm4 |
| 933 pxor %xmm0,%xmm3 |
| 934 pxor %xmm2,%xmm4 |
| 935 .byte 102,15,58,68,194,0 |
| 936 .byte 102,15,58,68,202,17 |
| 937 .byte 102,15,58,68,220,0 |
| 938 xorps %xmm0,%xmm3 |
| 939 xorps %xmm1,%xmm3 |
| 940 movdqa %xmm3,%xmm4 |
| 941 psrldq $8,%xmm3 |
| 942 pslldq $8,%xmm4 |
| 943 pxor %xmm3,%xmm1 |
| 944 pxor %xmm4,%xmm0 |
| 945 movdqa %xmm0,%xmm4 |
| 946 movdqa %xmm0,%xmm3 |
| 947 psllq $5,%xmm0 |
| 948 pxor %xmm0,%xmm3 |
| 949 psllq $1,%xmm0 |
| 950 pxor %xmm3,%xmm0 |
| 951 psllq $57,%xmm0 |
| 952 movdqa %xmm0,%xmm3 |
| 953 pslldq $8,%xmm0 |
| 954 psrldq $8,%xmm3 |
| 955 pxor %xmm4,%xmm0 |
| 956 pxor %xmm3,%xmm1 |
| 957 movdqa %xmm0,%xmm4 |
| 958 psrlq $1,%xmm0 |
| 959 pxor %xmm4,%xmm1 |
| 960 pxor %xmm0,%xmm4 |
| 961 psrlq $5,%xmm0 |
| 962 pxor %xmm4,%xmm0 |
| 963 psrlq $1,%xmm0 |
| 964 pxor %xmm1,%xmm0 |
| 965 pshufd $78,%xmm2,%xmm3 |
| 966 pshufd $78,%xmm0,%xmm4 |
| 967 pxor %xmm2,%xmm3 |
| 968 movdqu %xmm2,(%edx) |
| 969 pxor %xmm0,%xmm4 |
| 970 movdqu %xmm0,16(%edx) |
| 971 .byte 102,15,58,15,227,8 |
| 972 movdqu %xmm4,32(%edx) |
| 973 ret |
| 974 .globl _gcm_gmult_clmul |
| 975 .private_extern _gcm_gmult_clmul |
| 976 .align 4 |
| 977 _gcm_gmult_clmul: |
| 978 L_gcm_gmult_clmul_begin: |
| 979 movl 4(%esp),%eax |
| 980 movl 8(%esp),%edx |
| 981 call L011pic |
| 982 L011pic: |
| 983 popl %ecx |
| 984 leal Lbswap-L011pic(%ecx),%ecx |
| 985 movdqu (%eax),%xmm0 |
| 986 movdqa (%ecx),%xmm5 |
| 987 movups (%edx),%xmm2 |
| 988 .byte 102,15,56,0,197 |
| 989 movups 32(%edx),%xmm4 |
| 990 movdqa %xmm0,%xmm1 |
| 991 pshufd $78,%xmm0,%xmm3 |
| 992 pxor %xmm0,%xmm3 |
| 993 .byte 102,15,58,68,194,0 |
| 994 .byte 102,15,58,68,202,17 |
| 995 .byte 102,15,58,68,220,0 |
| 996 xorps %xmm0,%xmm3 |
| 997 xorps %xmm1,%xmm3 |
| 998 movdqa %xmm3,%xmm4 |
| 999 psrldq $8,%xmm3 |
| 1000 pslldq $8,%xmm4 |
| 1001 pxor %xmm3,%xmm1 |
| 1002 pxor %xmm4,%xmm0 |
| 1003 movdqa %xmm0,%xmm4 |
| 1004 movdqa %xmm0,%xmm3 |
| 1005 psllq $5,%xmm0 |
| 1006 pxor %xmm0,%xmm3 |
| 1007 psllq $1,%xmm0 |
| 1008 pxor %xmm3,%xmm0 |
| 1009 psllq $57,%xmm0 |
| 1010 movdqa %xmm0,%xmm3 |
| 1011 pslldq $8,%xmm0 |
| 1012 psrldq $8,%xmm3 |
| 1013 pxor %xmm4,%xmm0 |
| 1014 pxor %xmm3,%xmm1 |
| 1015 movdqa %xmm0,%xmm4 |
| 1016 psrlq $1,%xmm0 |
| 1017 pxor %xmm4,%xmm1 |
| 1018 pxor %xmm0,%xmm4 |
| 1019 psrlq $5,%xmm0 |
| 1020 pxor %xmm4,%xmm0 |
| 1021 psrlq $1,%xmm0 |
| 1022 pxor %xmm1,%xmm0 |
| 1023 .byte 102,15,56,0,197 |
| 1024 movdqu %xmm0,(%eax) |
| 1025 ret |
| 1026 .globl _gcm_ghash_clmul |
| 1027 .private_extern _gcm_ghash_clmul |
| 1028 .align 4 |
| 1029 _gcm_ghash_clmul: |
| 1030 L_gcm_ghash_clmul_begin: |
| 1031 pushl %ebp |
| 1032 pushl %ebx |
| 1033 pushl %esi |
| 1034 pushl %edi |
| 1035 movl 20(%esp),%eax |
| 1036 movl 24(%esp),%edx |
| 1037 movl 28(%esp),%esi |
| 1038 movl 32(%esp),%ebx |
| 1039 call L012pic |
| 1040 L012pic: |
| 1041 popl %ecx |
| 1042 leal Lbswap-L012pic(%ecx),%ecx |
| 1043 movdqu (%eax),%xmm0 |
| 1044 movdqa (%ecx),%xmm5 |
| 1045 movdqu (%edx),%xmm2 |
| 1046 .byte 102,15,56,0,197 |
| 1047 subl $16,%ebx |
| 1048 jz L013odd_tail |
| 1049 movdqu (%esi),%xmm3 |
| 1050 movdqu 16(%esi),%xmm6 |
| 1051 .byte 102,15,56,0,221 |
| 1052 .byte 102,15,56,0,245 |
| 1053 movdqu 32(%edx),%xmm5 |
| 1054 pxor %xmm3,%xmm0 |
| 1055 pshufd $78,%xmm6,%xmm3 |
| 1056 movdqa %xmm6,%xmm7 |
| 1057 pxor %xmm6,%xmm3 |
| 1058 leal 32(%esi),%esi |
| 1059 .byte 102,15,58,68,242,0 |
| 1060 .byte 102,15,58,68,250,17 |
| 1061 .byte 102,15,58,68,221,0 |
| 1062 movups 16(%edx),%xmm2 |
| 1063 nop |
| 1064 subl $32,%ebx |
| 1065 jbe L014even_tail |
| 1066 jmp L015mod_loop |
| 1067 .align 5,0x90 |
| 1068 L015mod_loop: |
| 1069 pshufd $78,%xmm0,%xmm4 |
| 1070 movdqa %xmm0,%xmm1 |
| 1071 pxor %xmm0,%xmm4 |
| 1072 nop |
| 1073 .byte 102,15,58,68,194,0 |
| 1074 .byte 102,15,58,68,202,17 |
| 1075 .byte 102,15,58,68,229,16 |
| 1076 movups (%edx),%xmm2 |
| 1077 xorps %xmm6,%xmm0 |
| 1078 movdqa (%ecx),%xmm5 |
| 1079 xorps %xmm7,%xmm1 |
| 1080 movdqu (%esi),%xmm7 |
| 1081 pxor %xmm0,%xmm3 |
| 1082 movdqu 16(%esi),%xmm6 |
| 1083 pxor %xmm1,%xmm3 |
| 1084 .byte 102,15,56,0,253 |
| 1085 pxor %xmm3,%xmm4 |
| 1086 movdqa %xmm4,%xmm3 |
| 1087 psrldq $8,%xmm4 |
| 1088 pslldq $8,%xmm3 |
| 1089 pxor %xmm4,%xmm1 |
| 1090 pxor %xmm3,%xmm0 |
| 1091 .byte 102,15,56,0,245 |
| 1092 pxor %xmm7,%xmm1 |
| 1093 movdqa %xmm6,%xmm7 |
| 1094 movdqa %xmm0,%xmm4 |
| 1095 movdqa %xmm0,%xmm3 |
| 1096 psllq $5,%xmm0 |
| 1097 pxor %xmm0,%xmm3 |
| 1098 psllq $1,%xmm0 |
| 1099 pxor %xmm3,%xmm0 |
| 1100 .byte 102,15,58,68,242,0 |
| 1101 movups 32(%edx),%xmm5 |
| 1102 psllq $57,%xmm0 |
| 1103 movdqa %xmm0,%xmm3 |
| 1104 pslldq $8,%xmm0 |
| 1105 psrldq $8,%xmm3 |
| 1106 pxor %xmm4,%xmm0 |
| 1107 pxor %xmm3,%xmm1 |
| 1108 pshufd $78,%xmm7,%xmm3 |
| 1109 movdqa %xmm0,%xmm4 |
| 1110 psrlq $1,%xmm0 |
| 1111 pxor %xmm7,%xmm3 |
| 1112 pxor %xmm4,%xmm1 |
| 1113 .byte 102,15,58,68,250,17 |
| 1114 movups 16(%edx),%xmm2 |
| 1115 pxor %xmm0,%xmm4 |
| 1116 psrlq $5,%xmm0 |
| 1117 pxor %xmm4,%xmm0 |
| 1118 psrlq $1,%xmm0 |
| 1119 pxor %xmm1,%xmm0 |
| 1120 .byte 102,15,58,68,221,0 |
| 1121 leal 32(%esi),%esi |
| 1122 subl $32,%ebx |
| 1123 ja L015mod_loop |
| 1124 L014even_tail: |
| 1125 pshufd $78,%xmm0,%xmm4 |
| 1126 movdqa %xmm0,%xmm1 |
| 1127 pxor %xmm0,%xmm4 |
| 1128 .byte 102,15,58,68,194,0 |
| 1129 .byte 102,15,58,68,202,17 |
| 1130 .byte 102,15,58,68,229,16 |
| 1131 movdqa (%ecx),%xmm5 |
| 1132 xorps %xmm6,%xmm0 |
| 1133 xorps %xmm7,%xmm1 |
| 1134 pxor %xmm0,%xmm3 |
| 1135 pxor %xmm1,%xmm3 |
| 1136 pxor %xmm3,%xmm4 |
| 1137 movdqa %xmm4,%xmm3 |
| 1138 psrldq $8,%xmm4 |
| 1139 pslldq $8,%xmm3 |
| 1140 pxor %xmm4,%xmm1 |
| 1141 pxor %xmm3,%xmm0 |
| 1142 movdqa %xmm0,%xmm4 |
| 1143 movdqa %xmm0,%xmm3 |
| 1144 psllq $5,%xmm0 |
| 1145 pxor %xmm0,%xmm3 |
| 1146 psllq $1,%xmm0 |
| 1147 pxor %xmm3,%xmm0 |
| 1148 psllq $57,%xmm0 |
| 1149 movdqa %xmm0,%xmm3 |
| 1150 pslldq $8,%xmm0 |
| 1151 psrldq $8,%xmm3 |
| 1152 pxor %xmm4,%xmm0 |
| 1153 pxor %xmm3,%xmm1 |
| 1154 movdqa %xmm0,%xmm4 |
| 1155 psrlq $1,%xmm0 |
| 1156 pxor %xmm4,%xmm1 |
| 1157 pxor %xmm0,%xmm4 |
| 1158 psrlq $5,%xmm0 |
| 1159 pxor %xmm4,%xmm0 |
| 1160 psrlq $1,%xmm0 |
| 1161 pxor %xmm1,%xmm0 |
| 1162 testl %ebx,%ebx |
| 1163 jnz L016done |
| 1164 movups (%edx),%xmm2 |
| 1165 L013odd_tail: |
| 1166 movdqu (%esi),%xmm3 |
| 1167 .byte 102,15,56,0,221 |
| 1168 pxor %xmm3,%xmm0 |
| 1169 movdqa %xmm0,%xmm1 |
| 1170 pshufd $78,%xmm0,%xmm3 |
| 1171 pshufd $78,%xmm2,%xmm4 |
| 1172 pxor %xmm0,%xmm3 |
| 1173 pxor %xmm2,%xmm4 |
| 1174 .byte 102,15,58,68,194,0 |
| 1175 .byte 102,15,58,68,202,17 |
| 1176 .byte 102,15,58,68,220,0 |
| 1177 xorps %xmm0,%xmm3 |
| 1178 xorps %xmm1,%xmm3 |
| 1179 movdqa %xmm3,%xmm4 |
| 1180 psrldq $8,%xmm3 |
| 1181 pslldq $8,%xmm4 |
| 1182 pxor %xmm3,%xmm1 |
| 1183 pxor %xmm4,%xmm0 |
| 1184 movdqa %xmm0,%xmm4 |
| 1185 movdqa %xmm0,%xmm3 |
| 1186 psllq $5,%xmm0 |
| 1187 pxor %xmm0,%xmm3 |
| 1188 psllq $1,%xmm0 |
| 1189 pxor %xmm3,%xmm0 |
| 1190 psllq $57,%xmm0 |
| 1191 movdqa %xmm0,%xmm3 |
| 1192 pslldq $8,%xmm0 |
| 1193 psrldq $8,%xmm3 |
| 1194 pxor %xmm4,%xmm0 |
| 1195 pxor %xmm3,%xmm1 |
| 1196 movdqa %xmm0,%xmm4 |
| 1197 psrlq $1,%xmm0 |
| 1198 pxor %xmm4,%xmm1 |
| 1199 pxor %xmm0,%xmm4 |
| 1200 psrlq $5,%xmm0 |
| 1201 pxor %xmm4,%xmm0 |
| 1202 psrlq $1,%xmm0 |
| 1203 pxor %xmm1,%xmm0 |
| 1204 L016done: |
| 1205 .byte 102,15,56,0,197 |
| 1206 movdqu %xmm0,(%eax) |
| 1207 popl %edi |
| 1208 popl %esi |
| 1209 popl %ebx |
| 1210 popl %ebp |
| 1211 ret |
| 1212 .align 6,0x90 |
| 1213 Lbswap: |
| 1214 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 1215 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 |
| 1216 .align 6,0x90 |
| 1217 Lrem_8bit: |
| 1218 .value 0,450,900,582,1800,1738,1164,1358 |
| 1219 .value 3600,4050,3476,3158,2328,2266,2716,2910 |
| 1220 .value 7200,7650,8100,7782,6952,6890,6316,6510 |
| 1221 .value 4656,5106,4532,4214,5432,5370,5820,6014 |
| 1222 .value 14400,14722,15300,14854,16200,16010,15564,15630 |
| 1223 .value 13904,14226,13780,13334,12632,12442,13020,13086 |
| 1224 .value 9312,9634,10212,9766,9064,8874,8428,8494 |
| 1225 .value 10864,11186,10740,10294,11640,11450,12028,12094 |
| 1226 .value 28800,28994,29444,29382,30600,30282,29708,30158 |
| 1227 .value 32400,32594,32020,31958,31128,30810,31260,31710 |
| 1228 .value 27808,28002,28452,28390,27560,27242,26668,27118 |
| 1229 .value 25264,25458,24884,24822,26040,25722,26172,26622 |
| 1230 .value 18624,18690,19268,19078,20424,19978,19532,19854 |
| 1231 .value 18128,18194,17748,17558,16856,16410,16988,17310 |
| 1232 .value 21728,21794,22372,22182,21480,21034,20588,20910 |
| 1233 .value 23280,23346,22900,22710,24056,23610,24188,24510 |
| 1234 .value 57600,57538,57988,58182,58888,59338,58764,58446 |
| 1235 .value 61200,61138,60564,60758,59416,59866,60316,59998 |
| 1236 .value 64800,64738,65188,65382,64040,64490,63916,63598 |
| 1237 .value 62256,62194,61620,61814,62520,62970,63420,63102 |
| 1238 .value 55616,55426,56004,56070,56904,57226,56780,56334 |
| 1239 .value 55120,54930,54484,54550,53336,53658,54236,53790 |
| 1240 .value 50528,50338,50916,50982,49768,50090,49644,49198 |
| 1241 .value 52080,51890,51444,51510,52344,52666,53244,52798 |
| 1242 .value 37248,36930,37380,37830,38536,38730,38156,38094 |
| 1243 .value 40848,40530,39956,40406,39064,39258,39708,39646 |
| 1244 .value 36256,35938,36388,36838,35496,35690,35116,35054 |
| 1245 .value 33712,33394,32820,33270,33976,34170,34620,34558 |
| 1246 .value 43456,43010,43588,43910,44744,44810,44364,44174 |
| 1247 .value 42960,42514,42068,42390,41176,41242,41820,41630 |
| 1248 .value 46560,46114,46692,47014,45800,45866,45420,45230 |
| 1249 .value 48112,47666,47220,47542,48376,48442,49020,48830 |
| 1250 .align 6,0x90 |
| 1251 Lrem_4bit: |
| 1252 .long 0,0,0,471859200,0,943718400,0,610271232 |
| 1253 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
| 1254 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
| 1255 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
| 1256 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 |
| 1257 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 |
| 1258 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 |
| 1259 .byte 0 |
| 1260 #endif |
OLD | NEW |