OLD | NEW |
(Empty) | |
| 1 #if defined(__i386__) |
| 2 .file "ghash-x86.S" |
| 3 .text |
| 4 .globl _gcm_gmult_4bit_x86 |
| 5 .align 4 |
| 6 _gcm_gmult_4bit_x86: |
| 7 L_gcm_gmult_4bit_x86_begin: |
| 8 pushl %ebp |
| 9 pushl %ebx |
| 10 pushl %esi |
| 11 pushl %edi |
| 12 subl $84,%esp |
| 13 movl 104(%esp),%edi |
| 14 movl 108(%esp),%esi |
| 15 movl (%edi),%ebp |
| 16 movl 4(%edi),%edx |
| 17 movl 8(%edi),%ecx |
| 18 movl 12(%edi),%ebx |
| 19 movl $0,16(%esp) |
| 20 movl $471859200,20(%esp) |
| 21 movl $943718400,24(%esp) |
| 22 movl $610271232,28(%esp) |
| 23 movl $1887436800,32(%esp) |
| 24 movl $1822425088,36(%esp) |
| 25 movl $1220542464,40(%esp) |
| 26 movl $1423966208,44(%esp) |
| 27 movl $3774873600,48(%esp) |
| 28 movl $4246732800,52(%esp) |
| 29 movl $3644850176,56(%esp) |
| 30 movl $3311403008,60(%esp) |
| 31 movl $2441084928,64(%esp) |
| 32 movl $2376073216,68(%esp) |
| 33 movl $2847932416,72(%esp) |
| 34 movl $3051356160,76(%esp) |
| 35 movl %ebp,(%esp) |
| 36 movl %edx,4(%esp) |
| 37 movl %ecx,8(%esp) |
| 38 movl %ebx,12(%esp) |
| 39 shrl $20,%ebx |
| 40 andl $240,%ebx |
| 41 movl 4(%esi,%ebx,1),%ebp |
| 42 movl (%esi,%ebx,1),%edx |
| 43 movl 12(%esi,%ebx,1),%ecx |
| 44 movl 8(%esi,%ebx,1),%ebx |
| 45 xorl %eax,%eax |
| 46 movl $15,%edi |
| 47 jmp L000x86_loop |
| 48 .align 4,0x90 |
| 49 L000x86_loop: |
| 50 movb %bl,%al |
| 51 shrdl $4,%ecx,%ebx |
| 52 andb $15,%al |
| 53 shrdl $4,%edx,%ecx |
| 54 shrdl $4,%ebp,%edx |
| 55 shrl $4,%ebp |
| 56 xorl 16(%esp,%eax,4),%ebp |
| 57 movb (%esp,%edi,1),%al |
| 58 andb $240,%al |
| 59 xorl 8(%esi,%eax,1),%ebx |
| 60 xorl 12(%esi,%eax,1),%ecx |
| 61 xorl (%esi,%eax,1),%edx |
| 62 xorl 4(%esi,%eax,1),%ebp |
| 63 decl %edi |
| 64 js L001x86_break |
| 65 movb %bl,%al |
| 66 shrdl $4,%ecx,%ebx |
| 67 andb $15,%al |
| 68 shrdl $4,%edx,%ecx |
| 69 shrdl $4,%ebp,%edx |
| 70 shrl $4,%ebp |
| 71 xorl 16(%esp,%eax,4),%ebp |
| 72 movb (%esp,%edi,1),%al |
| 73 shlb $4,%al |
| 74 xorl 8(%esi,%eax,1),%ebx |
| 75 xorl 12(%esi,%eax,1),%ecx |
| 76 xorl (%esi,%eax,1),%edx |
| 77 xorl 4(%esi,%eax,1),%ebp |
| 78 jmp L000x86_loop |
| 79 .align 4,0x90 |
| 80 L001x86_break: |
| 81 bswap %ebx |
| 82 bswap %ecx |
| 83 bswap %edx |
| 84 bswap %ebp |
| 85 movl 104(%esp),%edi |
| 86 movl %ebx,12(%edi) |
| 87 movl %ecx,8(%edi) |
| 88 movl %edx,4(%edi) |
| 89 movl %ebp,(%edi) |
| 90 addl $84,%esp |
| 91 popl %edi |
| 92 popl %esi |
| 93 popl %ebx |
| 94 popl %ebp |
| 95 ret |
| 96 .globl _gcm_ghash_4bit_x86 |
| 97 .align 4 |
| 98 _gcm_ghash_4bit_x86: |
| 99 L_gcm_ghash_4bit_x86_begin: |
| 100 pushl %ebp |
| 101 pushl %ebx |
| 102 pushl %esi |
| 103 pushl %edi |
| 104 subl $84,%esp |
| 105 movl 104(%esp),%ebx |
| 106 movl 108(%esp),%esi |
| 107 movl 112(%esp),%edi |
| 108 movl 116(%esp),%ecx |
| 109 addl %edi,%ecx |
| 110 movl %ecx,116(%esp) |
| 111 movl (%ebx),%ebp |
| 112 movl 4(%ebx),%edx |
| 113 movl 8(%ebx),%ecx |
| 114 movl 12(%ebx),%ebx |
| 115 movl $0,16(%esp) |
| 116 movl $471859200,20(%esp) |
| 117 movl $943718400,24(%esp) |
| 118 movl $610271232,28(%esp) |
| 119 movl $1887436800,32(%esp) |
| 120 movl $1822425088,36(%esp) |
| 121 movl $1220542464,40(%esp) |
| 122 movl $1423966208,44(%esp) |
| 123 movl $3774873600,48(%esp) |
| 124 movl $4246732800,52(%esp) |
| 125 movl $3644850176,56(%esp) |
| 126 movl $3311403008,60(%esp) |
| 127 movl $2441084928,64(%esp) |
| 128 movl $2376073216,68(%esp) |
| 129 movl $2847932416,72(%esp) |
| 130 movl $3051356160,76(%esp) |
| 131 .align 4,0x90 |
| 132 L002x86_outer_loop: |
| 133 xorl 12(%edi),%ebx |
| 134 xorl 8(%edi),%ecx |
| 135 xorl 4(%edi),%edx |
| 136 xorl (%edi),%ebp |
| 137 movl %ebx,12(%esp) |
| 138 movl %ecx,8(%esp) |
| 139 movl %edx,4(%esp) |
| 140 movl %ebp,(%esp) |
| 141 shrl $20,%ebx |
| 142 andl $240,%ebx |
| 143 movl 4(%esi,%ebx,1),%ebp |
| 144 movl (%esi,%ebx,1),%edx |
| 145 movl 12(%esi,%ebx,1),%ecx |
| 146 movl 8(%esi,%ebx,1),%ebx |
| 147 xorl %eax,%eax |
| 148 movl $15,%edi |
| 149 jmp L003x86_loop |
| 150 .align 4,0x90 |
| 151 L003x86_loop: |
| 152 movb %bl,%al |
| 153 shrdl $4,%ecx,%ebx |
| 154 andb $15,%al |
| 155 shrdl $4,%edx,%ecx |
| 156 shrdl $4,%ebp,%edx |
| 157 shrl $4,%ebp |
| 158 xorl 16(%esp,%eax,4),%ebp |
| 159 movb (%esp,%edi,1),%al |
| 160 andb $240,%al |
| 161 xorl 8(%esi,%eax,1),%ebx |
| 162 xorl 12(%esi,%eax,1),%ecx |
| 163 xorl (%esi,%eax,1),%edx |
| 164 xorl 4(%esi,%eax,1),%ebp |
| 165 decl %edi |
| 166 js L004x86_break |
| 167 movb %bl,%al |
| 168 shrdl $4,%ecx,%ebx |
| 169 andb $15,%al |
| 170 shrdl $4,%edx,%ecx |
| 171 shrdl $4,%ebp,%edx |
| 172 shrl $4,%ebp |
| 173 xorl 16(%esp,%eax,4),%ebp |
| 174 movb (%esp,%edi,1),%al |
| 175 shlb $4,%al |
| 176 xorl 8(%esi,%eax,1),%ebx |
| 177 xorl 12(%esi,%eax,1),%ecx |
| 178 xorl (%esi,%eax,1),%edx |
| 179 xorl 4(%esi,%eax,1),%ebp |
| 180 jmp L003x86_loop |
| 181 .align 4,0x90 |
| 182 L004x86_break: |
| 183 bswap %ebx |
| 184 bswap %ecx |
| 185 bswap %edx |
| 186 bswap %ebp |
| 187 movl 112(%esp),%edi |
| 188 leal 16(%edi),%edi |
| 189 cmpl 116(%esp),%edi |
| 190 movl %edi,112(%esp) |
| 191 jb L002x86_outer_loop |
| 192 movl 104(%esp),%edi |
| 193 movl %ebx,12(%edi) |
| 194 movl %ecx,8(%edi) |
| 195 movl %edx,4(%edi) |
| 196 movl %ebp,(%edi) |
| 197 addl $84,%esp |
| 198 popl %edi |
| 199 popl %esi |
| 200 popl %ebx |
| 201 popl %ebp |
| 202 ret |
| 203 .globl _gcm_gmult_4bit_mmx |
| 204 .align 4 |
| 205 _gcm_gmult_4bit_mmx: |
| 206 L_gcm_gmult_4bit_mmx_begin: |
| 207 pushl %ebp |
| 208 pushl %ebx |
| 209 pushl %esi |
| 210 pushl %edi |
| 211 movl 20(%esp),%edi |
| 212 movl 24(%esp),%esi |
| 213 call L005pic_point |
| 214 L005pic_point: |
| 215 popl %eax |
| 216 leal Lrem_4bit-L005pic_point(%eax),%eax |
| 217 movzbl 15(%edi),%ebx |
| 218 xorl %ecx,%ecx |
| 219 movl %ebx,%edx |
| 220 movb %dl,%cl |
| 221 movl $14,%ebp |
| 222 shlb $4,%cl |
| 223 andl $240,%edx |
| 224 movq 8(%esi,%ecx,1),%mm0 |
| 225 movq (%esi,%ecx,1),%mm1 |
| 226 movd %mm0,%ebx |
| 227 jmp L006mmx_loop |
| 228 .align 4,0x90 |
| 229 L006mmx_loop: |
| 230 psrlq $4,%mm0 |
| 231 andl $15,%ebx |
| 232 movq %mm1,%mm2 |
| 233 psrlq $4,%mm1 |
| 234 pxor 8(%esi,%edx,1),%mm0 |
| 235 movb (%edi,%ebp,1),%cl |
| 236 psllq $60,%mm2 |
| 237 pxor (%eax,%ebx,8),%mm1 |
| 238 decl %ebp |
| 239 movd %mm0,%ebx |
| 240 pxor (%esi,%edx,1),%mm1 |
| 241 movl %ecx,%edx |
| 242 pxor %mm2,%mm0 |
| 243 js L007mmx_break |
| 244 shlb $4,%cl |
| 245 andl $15,%ebx |
| 246 psrlq $4,%mm0 |
| 247 andl $240,%edx |
| 248 movq %mm1,%mm2 |
| 249 psrlq $4,%mm1 |
| 250 pxor 8(%esi,%ecx,1),%mm0 |
| 251 psllq $60,%mm2 |
| 252 pxor (%eax,%ebx,8),%mm1 |
| 253 movd %mm0,%ebx |
| 254 pxor (%esi,%ecx,1),%mm1 |
| 255 pxor %mm2,%mm0 |
| 256 jmp L006mmx_loop |
| 257 .align 4,0x90 |
| 258 L007mmx_break: |
| 259 shlb $4,%cl |
| 260 andl $15,%ebx |
| 261 psrlq $4,%mm0 |
| 262 andl $240,%edx |
| 263 movq %mm1,%mm2 |
| 264 psrlq $4,%mm1 |
| 265 pxor 8(%esi,%ecx,1),%mm0 |
| 266 psllq $60,%mm2 |
| 267 pxor (%eax,%ebx,8),%mm1 |
| 268 movd %mm0,%ebx |
| 269 pxor (%esi,%ecx,1),%mm1 |
| 270 pxor %mm2,%mm0 |
| 271 psrlq $4,%mm0 |
| 272 andl $15,%ebx |
| 273 movq %mm1,%mm2 |
| 274 psrlq $4,%mm1 |
| 275 pxor 8(%esi,%edx,1),%mm0 |
| 276 psllq $60,%mm2 |
| 277 pxor (%eax,%ebx,8),%mm1 |
| 278 movd %mm0,%ebx |
| 279 pxor (%esi,%edx,1),%mm1 |
| 280 pxor %mm2,%mm0 |
| 281 psrlq $32,%mm0 |
| 282 movd %mm1,%edx |
| 283 psrlq $32,%mm1 |
| 284 movd %mm0,%ecx |
| 285 movd %mm1,%ebp |
| 286 bswap %ebx |
| 287 bswap %edx |
| 288 bswap %ecx |
| 289 bswap %ebp |
| 290 emms |
| 291 movl %ebx,12(%edi) |
| 292 movl %edx,4(%edi) |
| 293 movl %ecx,8(%edi) |
| 294 movl %ebp,(%edi) |
| 295 popl %edi |
| 296 popl %esi |
| 297 popl %ebx |
| 298 popl %ebp |
| 299 ret |
| 300 .globl _gcm_ghash_4bit_mmx |
| 301 .align 4 |
| 302 _gcm_ghash_4bit_mmx: |
| 303 L_gcm_ghash_4bit_mmx_begin: |
| 304 pushl %ebp |
| 305 pushl %ebx |
| 306 pushl %esi |
| 307 pushl %edi |
| 308 movl 20(%esp),%eax |
| 309 movl 24(%esp),%ebx |
| 310 movl 28(%esp),%ecx |
| 311 movl 32(%esp),%edx |
| 312 movl %esp,%ebp |
| 313 call L008pic_point |
| 314 L008pic_point: |
| 315 popl %esi |
| 316 leal Lrem_8bit-L008pic_point(%esi),%esi |
| 317 subl $544,%esp |
| 318 andl $-64,%esp |
| 319 subl $16,%esp |
| 320 addl %ecx,%edx |
| 321 movl %eax,544(%esp) |
| 322 movl %edx,552(%esp) |
| 323 movl %ebp,556(%esp) |
| 324 addl $128,%ebx |
| 325 leal 144(%esp),%edi |
| 326 leal 400(%esp),%ebp |
| 327 movl -120(%ebx),%edx |
| 328 movq -120(%ebx),%mm0 |
| 329 movq -128(%ebx),%mm3 |
| 330 shll $4,%edx |
| 331 movb %dl,(%esp) |
| 332 movl -104(%ebx),%edx |
| 333 movq -104(%ebx),%mm2 |
| 334 movq -112(%ebx),%mm5 |
| 335 movq %mm0,-128(%edi) |
| 336 psrlq $4,%mm0 |
| 337 movq %mm3,(%edi) |
| 338 movq %mm3,%mm7 |
| 339 psrlq $4,%mm3 |
| 340 shll $4,%edx |
| 341 movb %dl,1(%esp) |
| 342 movl -88(%ebx),%edx |
| 343 movq -88(%ebx),%mm1 |
| 344 psllq $60,%mm7 |
| 345 movq -96(%ebx),%mm4 |
| 346 por %mm7,%mm0 |
| 347 movq %mm2,-120(%edi) |
| 348 psrlq $4,%mm2 |
| 349 movq %mm5,8(%edi) |
| 350 movq %mm5,%mm6 |
| 351 movq %mm0,-128(%ebp) |
| 352 psrlq $4,%mm5 |
| 353 movq %mm3,(%ebp) |
| 354 shll $4,%edx |
| 355 movb %dl,2(%esp) |
| 356 movl -72(%ebx),%edx |
| 357 movq -72(%ebx),%mm0 |
| 358 psllq $60,%mm6 |
| 359 movq -80(%ebx),%mm3 |
| 360 por %mm6,%mm2 |
| 361 movq %mm1,-112(%edi) |
| 362 psrlq $4,%mm1 |
| 363 movq %mm4,16(%edi) |
| 364 movq %mm4,%mm7 |
| 365 movq %mm2,-120(%ebp) |
| 366 psrlq $4,%mm4 |
| 367 movq %mm5,8(%ebp) |
| 368 shll $4,%edx |
| 369 movb %dl,3(%esp) |
| 370 movl -56(%ebx),%edx |
| 371 movq -56(%ebx),%mm2 |
| 372 psllq $60,%mm7 |
| 373 movq -64(%ebx),%mm5 |
| 374 por %mm7,%mm1 |
| 375 movq %mm0,-104(%edi) |
| 376 psrlq $4,%mm0 |
| 377 movq %mm3,24(%edi) |
| 378 movq %mm3,%mm6 |
| 379 movq %mm1,-112(%ebp) |
| 380 psrlq $4,%mm3 |
| 381 movq %mm4,16(%ebp) |
| 382 shll $4,%edx |
| 383 movb %dl,4(%esp) |
| 384 movl -40(%ebx),%edx |
| 385 movq -40(%ebx),%mm1 |
| 386 psllq $60,%mm6 |
| 387 movq -48(%ebx),%mm4 |
| 388 por %mm6,%mm0 |
| 389 movq %mm2,-96(%edi) |
| 390 psrlq $4,%mm2 |
| 391 movq %mm5,32(%edi) |
| 392 movq %mm5,%mm7 |
| 393 movq %mm0,-104(%ebp) |
| 394 psrlq $4,%mm5 |
| 395 movq %mm3,24(%ebp) |
| 396 shll $4,%edx |
| 397 movb %dl,5(%esp) |
| 398 movl -24(%ebx),%edx |
| 399 movq -24(%ebx),%mm0 |
| 400 psllq $60,%mm7 |
| 401 movq -32(%ebx),%mm3 |
| 402 por %mm7,%mm2 |
| 403 movq %mm1,-88(%edi) |
| 404 psrlq $4,%mm1 |
| 405 movq %mm4,40(%edi) |
| 406 movq %mm4,%mm6 |
| 407 movq %mm2,-96(%ebp) |
| 408 psrlq $4,%mm4 |
| 409 movq %mm5,32(%ebp) |
| 410 shll $4,%edx |
| 411 movb %dl,6(%esp) |
| 412 movl -8(%ebx),%edx |
| 413 movq -8(%ebx),%mm2 |
| 414 psllq $60,%mm6 |
| 415 movq -16(%ebx),%mm5 |
| 416 por %mm6,%mm1 |
| 417 movq %mm0,-80(%edi) |
| 418 psrlq $4,%mm0 |
| 419 movq %mm3,48(%edi) |
| 420 movq %mm3,%mm7 |
| 421 movq %mm1,-88(%ebp) |
| 422 psrlq $4,%mm3 |
| 423 movq %mm4,40(%ebp) |
| 424 shll $4,%edx |
| 425 movb %dl,7(%esp) |
| 426 movl 8(%ebx),%edx |
| 427 movq 8(%ebx),%mm1 |
| 428 psllq $60,%mm7 |
| 429 movq (%ebx),%mm4 |
| 430 por %mm7,%mm0 |
| 431 movq %mm2,-72(%edi) |
| 432 psrlq $4,%mm2 |
| 433 movq %mm5,56(%edi) |
| 434 movq %mm5,%mm6 |
| 435 movq %mm0,-80(%ebp) |
| 436 psrlq $4,%mm5 |
| 437 movq %mm3,48(%ebp) |
| 438 shll $4,%edx |
| 439 movb %dl,8(%esp) |
| 440 movl 24(%ebx),%edx |
| 441 movq 24(%ebx),%mm0 |
| 442 psllq $60,%mm6 |
| 443 movq 16(%ebx),%mm3 |
| 444 por %mm6,%mm2 |
| 445 movq %mm1,-64(%edi) |
| 446 psrlq $4,%mm1 |
| 447 movq %mm4,64(%edi) |
| 448 movq %mm4,%mm7 |
| 449 movq %mm2,-72(%ebp) |
| 450 psrlq $4,%mm4 |
| 451 movq %mm5,56(%ebp) |
| 452 shll $4,%edx |
| 453 movb %dl,9(%esp) |
| 454 movl 40(%ebx),%edx |
| 455 movq 40(%ebx),%mm2 |
| 456 psllq $60,%mm7 |
| 457 movq 32(%ebx),%mm5 |
| 458 por %mm7,%mm1 |
| 459 movq %mm0,-56(%edi) |
| 460 psrlq $4,%mm0 |
| 461 movq %mm3,72(%edi) |
| 462 movq %mm3,%mm6 |
| 463 movq %mm1,-64(%ebp) |
| 464 psrlq $4,%mm3 |
| 465 movq %mm4,64(%ebp) |
| 466 shll $4,%edx |
| 467 movb %dl,10(%esp) |
| 468 movl 56(%ebx),%edx |
| 469 movq 56(%ebx),%mm1 |
| 470 psllq $60,%mm6 |
| 471 movq 48(%ebx),%mm4 |
| 472 por %mm6,%mm0 |
| 473 movq %mm2,-48(%edi) |
| 474 psrlq $4,%mm2 |
| 475 movq %mm5,80(%edi) |
| 476 movq %mm5,%mm7 |
| 477 movq %mm0,-56(%ebp) |
| 478 psrlq $4,%mm5 |
| 479 movq %mm3,72(%ebp) |
| 480 shll $4,%edx |
| 481 movb %dl,11(%esp) |
| 482 movl 72(%ebx),%edx |
| 483 movq 72(%ebx),%mm0 |
| 484 psllq $60,%mm7 |
| 485 movq 64(%ebx),%mm3 |
| 486 por %mm7,%mm2 |
| 487 movq %mm1,-40(%edi) |
| 488 psrlq $4,%mm1 |
| 489 movq %mm4,88(%edi) |
| 490 movq %mm4,%mm6 |
| 491 movq %mm2,-48(%ebp) |
| 492 psrlq $4,%mm4 |
| 493 movq %mm5,80(%ebp) |
| 494 shll $4,%edx |
| 495 movb %dl,12(%esp) |
| 496 movl 88(%ebx),%edx |
| 497 movq 88(%ebx),%mm2 |
| 498 psllq $60,%mm6 |
| 499 movq 80(%ebx),%mm5 |
| 500 por %mm6,%mm1 |
| 501 movq %mm0,-32(%edi) |
| 502 psrlq $4,%mm0 |
| 503 movq %mm3,96(%edi) |
| 504 movq %mm3,%mm7 |
| 505 movq %mm1,-40(%ebp) |
| 506 psrlq $4,%mm3 |
| 507 movq %mm4,88(%ebp) |
| 508 shll $4,%edx |
| 509 movb %dl,13(%esp) |
| 510 movl 104(%ebx),%edx |
| 511 movq 104(%ebx),%mm1 |
| 512 psllq $60,%mm7 |
| 513 movq 96(%ebx),%mm4 |
| 514 por %mm7,%mm0 |
| 515 movq %mm2,-24(%edi) |
| 516 psrlq $4,%mm2 |
| 517 movq %mm5,104(%edi) |
| 518 movq %mm5,%mm6 |
| 519 movq %mm0,-32(%ebp) |
| 520 psrlq $4,%mm5 |
| 521 movq %mm3,96(%ebp) |
| 522 shll $4,%edx |
| 523 movb %dl,14(%esp) |
| 524 movl 120(%ebx),%edx |
| 525 movq 120(%ebx),%mm0 |
| 526 psllq $60,%mm6 |
| 527 movq 112(%ebx),%mm3 |
| 528 por %mm6,%mm2 |
| 529 movq %mm1,-16(%edi) |
| 530 psrlq $4,%mm1 |
| 531 movq %mm4,112(%edi) |
| 532 movq %mm4,%mm7 |
| 533 movq %mm2,-24(%ebp) |
| 534 psrlq $4,%mm4 |
| 535 movq %mm5,104(%ebp) |
| 536 shll $4,%edx |
| 537 movb %dl,15(%esp) |
| 538 psllq $60,%mm7 |
| 539 por %mm7,%mm1 |
| 540 movq %mm0,-8(%edi) |
| 541 psrlq $4,%mm0 |
| 542 movq %mm3,120(%edi) |
| 543 movq %mm3,%mm6 |
| 544 movq %mm1,-16(%ebp) |
| 545 psrlq $4,%mm3 |
| 546 movq %mm4,112(%ebp) |
| 547 psllq $60,%mm6 |
| 548 por %mm6,%mm0 |
| 549 movq %mm0,-8(%ebp) |
| 550 movq %mm3,120(%ebp) |
| 551 movq (%eax),%mm6 |
| 552 movl 8(%eax),%ebx |
| 553 movl 12(%eax),%edx |
| 554 .align 4,0x90 |
| 555 L009outer: |
| 556 xorl 12(%ecx),%edx |
| 557 xorl 8(%ecx),%ebx |
| 558 pxor (%ecx),%mm6 |
| 559 leal 16(%ecx),%ecx |
| 560 movl %ebx,536(%esp) |
| 561 movq %mm6,528(%esp) |
| 562 movl %ecx,548(%esp) |
| 563 xorl %eax,%eax |
| 564 roll $8,%edx |
| 565 movb %dl,%al |
| 566 movl %eax,%ebp |
| 567 andb $15,%al |
| 568 shrl $4,%ebp |
| 569 pxor %mm0,%mm0 |
| 570 roll $8,%edx |
| 571 pxor %mm1,%mm1 |
| 572 pxor %mm2,%mm2 |
| 573 movq 16(%esp,%eax,8),%mm7 |
| 574 movq 144(%esp,%eax,8),%mm6 |
| 575 movb %dl,%al |
| 576 movd %mm7,%ebx |
| 577 psrlq $8,%mm7 |
| 578 movq %mm6,%mm3 |
| 579 movl %eax,%edi |
| 580 psrlq $8,%mm6 |
| 581 pxor 272(%esp,%ebp,8),%mm7 |
| 582 andb $15,%al |
| 583 psllq $56,%mm3 |
| 584 shrl $4,%edi |
| 585 pxor 16(%esp,%eax,8),%mm7 |
| 586 roll $8,%edx |
| 587 pxor 144(%esp,%eax,8),%mm6 |
| 588 pxor %mm3,%mm7 |
| 589 pxor 400(%esp,%ebp,8),%mm6 |
| 590 xorb (%esp,%ebp,1),%bl |
| 591 movb %dl,%al |
| 592 movd %mm7,%ecx |
| 593 movzbl %bl,%ebx |
| 594 psrlq $8,%mm7 |
| 595 movq %mm6,%mm3 |
| 596 movl %eax,%ebp |
| 597 psrlq $8,%mm6 |
| 598 pxor 272(%esp,%edi,8),%mm7 |
| 599 andb $15,%al |
| 600 psllq $56,%mm3 |
| 601 shrl $4,%ebp |
| 602 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 603 pxor 16(%esp,%eax,8),%mm7 |
| 604 roll $8,%edx |
| 605 pxor 144(%esp,%eax,8),%mm6 |
| 606 pxor %mm3,%mm7 |
| 607 pxor 400(%esp,%edi,8),%mm6 |
| 608 xorb (%esp,%edi,1),%cl |
| 609 movb %dl,%al |
| 610 movl 536(%esp),%edx |
| 611 movd %mm7,%ebx |
| 612 movzbl %cl,%ecx |
| 613 psrlq $8,%mm7 |
| 614 movq %mm6,%mm3 |
| 615 movl %eax,%edi |
| 616 psrlq $8,%mm6 |
| 617 pxor 272(%esp,%ebp,8),%mm7 |
| 618 andb $15,%al |
| 619 psllq $56,%mm3 |
| 620 pxor %mm2,%mm6 |
| 621 shrl $4,%edi |
| 622 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 623 pxor 16(%esp,%eax,8),%mm7 |
| 624 roll $8,%edx |
| 625 pxor 144(%esp,%eax,8),%mm6 |
| 626 pxor %mm3,%mm7 |
| 627 pxor 400(%esp,%ebp,8),%mm6 |
| 628 xorb (%esp,%ebp,1),%bl |
| 629 movb %dl,%al |
| 630 movd %mm7,%ecx |
| 631 movzbl %bl,%ebx |
| 632 psrlq $8,%mm7 |
| 633 movq %mm6,%mm3 |
| 634 movl %eax,%ebp |
| 635 psrlq $8,%mm6 |
| 636 pxor 272(%esp,%edi,8),%mm7 |
| 637 andb $15,%al |
| 638 psllq $56,%mm3 |
| 639 pxor %mm1,%mm6 |
| 640 shrl $4,%ebp |
| 641 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 642 pxor 16(%esp,%eax,8),%mm7 |
| 643 roll $8,%edx |
| 644 pxor 144(%esp,%eax,8),%mm6 |
| 645 pxor %mm3,%mm7 |
| 646 pxor 400(%esp,%edi,8),%mm6 |
| 647 xorb (%esp,%edi,1),%cl |
| 648 movb %dl,%al |
| 649 movd %mm7,%ebx |
| 650 movzbl %cl,%ecx |
| 651 psrlq $8,%mm7 |
| 652 movq %mm6,%mm3 |
| 653 movl %eax,%edi |
| 654 psrlq $8,%mm6 |
| 655 pxor 272(%esp,%ebp,8),%mm7 |
| 656 andb $15,%al |
| 657 psllq $56,%mm3 |
| 658 pxor %mm0,%mm6 |
| 659 shrl $4,%edi |
| 660 pinsrw $2,(%esi,%ecx,2),%mm2 |
| 661 pxor 16(%esp,%eax,8),%mm7 |
| 662 roll $8,%edx |
| 663 pxor 144(%esp,%eax,8),%mm6 |
| 664 pxor %mm3,%mm7 |
| 665 pxor 400(%esp,%ebp,8),%mm6 |
| 666 xorb (%esp,%ebp,1),%bl |
| 667 movb %dl,%al |
| 668 movd %mm7,%ecx |
| 669 movzbl %bl,%ebx |
| 670 psrlq $8,%mm7 |
| 671 movq %mm6,%mm3 |
| 672 movl %eax,%ebp |
| 673 psrlq $8,%mm6 |
| 674 pxor 272(%esp,%edi,8),%mm7 |
| 675 andb $15,%al |
| 676 psllq $56,%mm3 |
| 677 pxor %mm2,%mm6 |
| 678 shrl $4,%ebp |
| 679 pinsrw $2,(%esi,%ebx,2),%mm1 |
| 680 pxor 16(%esp,%eax,8),%mm7 |
| 681 roll $8,%edx |
| 682 pxor 144(%esp,%eax,8),%mm6 |
| 683 pxor %mm3,%mm7 |
| 684 pxor 400(%esp,%edi,8),%mm6 |
| 685 xorb (%esp,%edi,1),%cl |
| 686 movb %dl,%al |
| 687 movl 532(%esp),%edx |
| 688 movd %mm7,%ebx |
| 689 movzbl %cl,%ecx |
| 690 psrlq $8,%mm7 |
| 691 movq %mm6,%mm3 |
| 692 movl %eax,%edi |
| 693 psrlq $8,%mm6 |
| 694 pxor 272(%esp,%ebp,8),%mm7 |
| 695 andb $15,%al |
| 696 psllq $56,%mm3 |
| 697 pxor %mm1,%mm6 |
| 698 shrl $4,%edi |
| 699 pinsrw $2,(%esi,%ecx,2),%mm0 |
| 700 pxor 16(%esp,%eax,8),%mm7 |
| 701 roll $8,%edx |
| 702 pxor 144(%esp,%eax,8),%mm6 |
| 703 pxor %mm3,%mm7 |
| 704 pxor 400(%esp,%ebp,8),%mm6 |
| 705 xorb (%esp,%ebp,1),%bl |
| 706 movb %dl,%al |
| 707 movd %mm7,%ecx |
| 708 movzbl %bl,%ebx |
| 709 psrlq $8,%mm7 |
| 710 movq %mm6,%mm3 |
| 711 movl %eax,%ebp |
| 712 psrlq $8,%mm6 |
| 713 pxor 272(%esp,%edi,8),%mm7 |
| 714 andb $15,%al |
| 715 psllq $56,%mm3 |
| 716 pxor %mm0,%mm6 |
| 717 shrl $4,%ebp |
| 718 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 719 pxor 16(%esp,%eax,8),%mm7 |
| 720 roll $8,%edx |
| 721 pxor 144(%esp,%eax,8),%mm6 |
| 722 pxor %mm3,%mm7 |
| 723 pxor 400(%esp,%edi,8),%mm6 |
| 724 xorb (%esp,%edi,1),%cl |
| 725 movb %dl,%al |
| 726 movd %mm7,%ebx |
| 727 movzbl %cl,%ecx |
| 728 psrlq $8,%mm7 |
| 729 movq %mm6,%mm3 |
| 730 movl %eax,%edi |
| 731 psrlq $8,%mm6 |
| 732 pxor 272(%esp,%ebp,8),%mm7 |
| 733 andb $15,%al |
| 734 psllq $56,%mm3 |
| 735 pxor %mm2,%mm6 |
| 736 shrl $4,%edi |
| 737 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 738 pxor 16(%esp,%eax,8),%mm7 |
| 739 roll $8,%edx |
| 740 pxor 144(%esp,%eax,8),%mm6 |
| 741 pxor %mm3,%mm7 |
| 742 pxor 400(%esp,%ebp,8),%mm6 |
| 743 xorb (%esp,%ebp,1),%bl |
| 744 movb %dl,%al |
| 745 movd %mm7,%ecx |
| 746 movzbl %bl,%ebx |
| 747 psrlq $8,%mm7 |
| 748 movq %mm6,%mm3 |
| 749 movl %eax,%ebp |
| 750 psrlq $8,%mm6 |
| 751 pxor 272(%esp,%edi,8),%mm7 |
| 752 andb $15,%al |
| 753 psllq $56,%mm3 |
| 754 pxor %mm1,%mm6 |
| 755 shrl $4,%ebp |
| 756 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 757 pxor 16(%esp,%eax,8),%mm7 |
| 758 roll $8,%edx |
| 759 pxor 144(%esp,%eax,8),%mm6 |
| 760 pxor %mm3,%mm7 |
| 761 pxor 400(%esp,%edi,8),%mm6 |
| 762 xorb (%esp,%edi,1),%cl |
| 763 movb %dl,%al |
| 764 movl 528(%esp),%edx |
| 765 movd %mm7,%ebx |
| 766 movzbl %cl,%ecx |
| 767 psrlq $8,%mm7 |
| 768 movq %mm6,%mm3 |
| 769 movl %eax,%edi |
| 770 psrlq $8,%mm6 |
| 771 pxor 272(%esp,%ebp,8),%mm7 |
| 772 andb $15,%al |
| 773 psllq $56,%mm3 |
| 774 pxor %mm0,%mm6 |
| 775 shrl $4,%edi |
| 776 pinsrw $2,(%esi,%ecx,2),%mm2 |
| 777 pxor 16(%esp,%eax,8),%mm7 |
| 778 roll $8,%edx |
| 779 pxor 144(%esp,%eax,8),%mm6 |
| 780 pxor %mm3,%mm7 |
| 781 pxor 400(%esp,%ebp,8),%mm6 |
| 782 xorb (%esp,%ebp,1),%bl |
| 783 movb %dl,%al |
| 784 movd %mm7,%ecx |
| 785 movzbl %bl,%ebx |
| 786 psrlq $8,%mm7 |
| 787 movq %mm6,%mm3 |
| 788 movl %eax,%ebp |
| 789 psrlq $8,%mm6 |
| 790 pxor 272(%esp,%edi,8),%mm7 |
| 791 andb $15,%al |
| 792 psllq $56,%mm3 |
| 793 pxor %mm2,%mm6 |
| 794 shrl $4,%ebp |
| 795 pinsrw $2,(%esi,%ebx,2),%mm1 |
| 796 pxor 16(%esp,%eax,8),%mm7 |
| 797 roll $8,%edx |
| 798 pxor 144(%esp,%eax,8),%mm6 |
| 799 pxor %mm3,%mm7 |
| 800 pxor 400(%esp,%edi,8),%mm6 |
| 801 xorb (%esp,%edi,1),%cl |
| 802 movb %dl,%al |
| 803 movd %mm7,%ebx |
| 804 movzbl %cl,%ecx |
| 805 psrlq $8,%mm7 |
| 806 movq %mm6,%mm3 |
| 807 movl %eax,%edi |
| 808 psrlq $8,%mm6 |
| 809 pxor 272(%esp,%ebp,8),%mm7 |
| 810 andb $15,%al |
| 811 psllq $56,%mm3 |
| 812 pxor %mm1,%mm6 |
| 813 shrl $4,%edi |
| 814 pinsrw $2,(%esi,%ecx,2),%mm0 |
| 815 pxor 16(%esp,%eax,8),%mm7 |
| 816 roll $8,%edx |
| 817 pxor 144(%esp,%eax,8),%mm6 |
| 818 pxor %mm3,%mm7 |
| 819 pxor 400(%esp,%ebp,8),%mm6 |
| 820 xorb (%esp,%ebp,1),%bl |
| 821 movb %dl,%al |
| 822 movd %mm7,%ecx |
| 823 movzbl %bl,%ebx |
| 824 psrlq $8,%mm7 |
| 825 movq %mm6,%mm3 |
| 826 movl %eax,%ebp |
| 827 psrlq $8,%mm6 |
| 828 pxor 272(%esp,%edi,8),%mm7 |
| 829 andb $15,%al |
| 830 psllq $56,%mm3 |
| 831 pxor %mm0,%mm6 |
| 832 shrl $4,%ebp |
| 833 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 834 pxor 16(%esp,%eax,8),%mm7 |
| 835 roll $8,%edx |
| 836 pxor 144(%esp,%eax,8),%mm6 |
| 837 pxor %mm3,%mm7 |
| 838 pxor 400(%esp,%edi,8),%mm6 |
| 839 xorb (%esp,%edi,1),%cl |
| 840 movb %dl,%al |
| 841 movl 524(%esp),%edx |
| 842 movd %mm7,%ebx |
| 843 movzbl %cl,%ecx |
| 844 psrlq $8,%mm7 |
| 845 movq %mm6,%mm3 |
| 846 movl %eax,%edi |
| 847 psrlq $8,%mm6 |
| 848 pxor 272(%esp,%ebp,8),%mm7 |
| 849 andb $15,%al |
| 850 psllq $56,%mm3 |
| 851 pxor %mm2,%mm6 |
| 852 shrl $4,%edi |
| 853 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 854 pxor 16(%esp,%eax,8),%mm7 |
| 855 pxor 144(%esp,%eax,8),%mm6 |
| 856 xorb (%esp,%ebp,1),%bl |
| 857 pxor %mm3,%mm7 |
| 858 pxor 400(%esp,%ebp,8),%mm6 |
| 859 movzbl %bl,%ebx |
| 860 pxor %mm2,%mm2 |
| 861 psllq $4,%mm1 |
| 862 movd %mm7,%ecx |
| 863 psrlq $4,%mm7 |
| 864 movq %mm6,%mm3 |
| 865 psrlq $4,%mm6 |
| 866 shll $4,%ecx |
| 867 pxor 16(%esp,%edi,8),%mm7 |
| 868 psllq $60,%mm3 |
| 869 movzbl %cl,%ecx |
| 870 pxor %mm3,%mm7 |
| 871 pxor 144(%esp,%edi,8),%mm6 |
| 872 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 873 pxor %mm1,%mm6 |
| 874 movd %mm7,%edx |
| 875 pinsrw $3,(%esi,%ecx,2),%mm2 |
| 876 psllq $12,%mm0 |
| 877 pxor %mm0,%mm6 |
| 878 psrlq $32,%mm7 |
| 879 pxor %mm2,%mm6 |
| 880 movl 548(%esp),%ecx |
| 881 movd %mm7,%ebx |
| 882 movq %mm6,%mm3 |
| 883 psllw $8,%mm6 |
| 884 psrlw $8,%mm3 |
| 885 por %mm3,%mm6 |
| 886 bswap %edx |
| 887 pshufw $27,%mm6,%mm6 |
| 888 bswap %ebx |
| 889 cmpl 552(%esp),%ecx |
| 890 jne L009outer |
| 891 movl 544(%esp),%eax |
| 892 movl %edx,12(%eax) |
| 893 movl %ebx,8(%eax) |
| 894 movq %mm6,(%eax) |
| 895 movl 556(%esp),%esp |
| 896 emms |
| 897 popl %edi |
| 898 popl %esi |
| 899 popl %ebx |
| 900 popl %ebp |
| 901 ret |
| 902 .globl _gcm_init_clmul |
| 903 .align 4 |
| 904 _gcm_init_clmul: |
| 905 L_gcm_init_clmul_begin: |
| 906 movl 4(%esp),%edx |
| 907 movl 8(%esp),%eax |
| 908 call L010pic |
| 909 L010pic: |
| 910 popl %ecx |
| 911 leal Lbswap-L010pic(%ecx),%ecx |
| 912 movdqu (%eax),%xmm2 |
| 913 pshufd $78,%xmm2,%xmm2 |
| 914 pshufd $255,%xmm2,%xmm4 |
| 915 movdqa %xmm2,%xmm3 |
| 916 psllq $1,%xmm2 |
| 917 pxor %xmm5,%xmm5 |
| 918 psrlq $63,%xmm3 |
| 919 pcmpgtd %xmm4,%xmm5 |
| 920 pslldq $8,%xmm3 |
| 921 por %xmm3,%xmm2 |
| 922 pand 16(%ecx),%xmm5 |
| 923 pxor %xmm5,%xmm2 |
| 924 movdqa %xmm2,%xmm0 |
| 925 movdqa %xmm0,%xmm1 |
| 926 pshufd $78,%xmm0,%xmm3 |
| 927 pshufd $78,%xmm2,%xmm4 |
| 928 pxor %xmm0,%xmm3 |
| 929 pxor %xmm2,%xmm4 |
| 930 .byte 102,15,58,68,194,0 |
| 931 .byte 102,15,58,68,202,17 |
| 932 .byte 102,15,58,68,220,0 |
| 933 xorps %xmm0,%xmm3 |
| 934 xorps %xmm1,%xmm3 |
| 935 movdqa %xmm3,%xmm4 |
| 936 psrldq $8,%xmm3 |
| 937 pslldq $8,%xmm4 |
| 938 pxor %xmm3,%xmm1 |
| 939 pxor %xmm4,%xmm0 |
| 940 movdqa %xmm0,%xmm4 |
| 941 movdqa %xmm0,%xmm3 |
| 942 psllq $5,%xmm0 |
| 943 pxor %xmm0,%xmm3 |
| 944 psllq $1,%xmm0 |
| 945 pxor %xmm3,%xmm0 |
| 946 psllq $57,%xmm0 |
| 947 movdqa %xmm0,%xmm3 |
| 948 pslldq $8,%xmm0 |
| 949 psrldq $8,%xmm3 |
| 950 pxor %xmm4,%xmm0 |
| 951 pxor %xmm3,%xmm1 |
| 952 movdqa %xmm0,%xmm4 |
| 953 psrlq $1,%xmm0 |
| 954 pxor %xmm4,%xmm1 |
| 955 pxor %xmm0,%xmm4 |
| 956 psrlq $5,%xmm0 |
| 957 pxor %xmm4,%xmm0 |
| 958 psrlq $1,%xmm0 |
| 959 pxor %xmm1,%xmm0 |
| 960 pshufd $78,%xmm2,%xmm3 |
| 961 pshufd $78,%xmm0,%xmm4 |
| 962 pxor %xmm2,%xmm3 |
| 963 movdqu %xmm2,(%edx) |
| 964 pxor %xmm0,%xmm4 |
| 965 movdqu %xmm0,16(%edx) |
| 966 .byte 102,15,58,15,227,8 |
| 967 movdqu %xmm4,32(%edx) |
| 968 ret |
| 969 .globl _gcm_gmult_clmul |
| 970 .align 4 |
| 971 _gcm_gmult_clmul: |
| 972 L_gcm_gmult_clmul_begin: |
| 973 movl 4(%esp),%eax |
| 974 movl 8(%esp),%edx |
| 975 call L011pic |
| 976 L011pic: |
| 977 popl %ecx |
| 978 leal Lbswap-L011pic(%ecx),%ecx |
| 979 movdqu (%eax),%xmm0 |
| 980 movdqa (%ecx),%xmm5 |
| 981 movups (%edx),%xmm2 |
| 982 .byte 102,15,56,0,197 |
| 983 movups 32(%edx),%xmm4 |
| 984 movdqa %xmm0,%xmm1 |
| 985 pshufd $78,%xmm0,%xmm3 |
| 986 pxor %xmm0,%xmm3 |
| 987 .byte 102,15,58,68,194,0 |
| 988 .byte 102,15,58,68,202,17 |
| 989 .byte 102,15,58,68,220,0 |
| 990 xorps %xmm0,%xmm3 |
| 991 xorps %xmm1,%xmm3 |
| 992 movdqa %xmm3,%xmm4 |
| 993 psrldq $8,%xmm3 |
| 994 pslldq $8,%xmm4 |
| 995 pxor %xmm3,%xmm1 |
| 996 pxor %xmm4,%xmm0 |
| 997 movdqa %xmm0,%xmm4 |
| 998 movdqa %xmm0,%xmm3 |
| 999 psllq $5,%xmm0 |
| 1000 pxor %xmm0,%xmm3 |
| 1001 psllq $1,%xmm0 |
| 1002 pxor %xmm3,%xmm0 |
| 1003 psllq $57,%xmm0 |
| 1004 movdqa %xmm0,%xmm3 |
| 1005 pslldq $8,%xmm0 |
| 1006 psrldq $8,%xmm3 |
| 1007 pxor %xmm4,%xmm0 |
| 1008 pxor %xmm3,%xmm1 |
| 1009 movdqa %xmm0,%xmm4 |
| 1010 psrlq $1,%xmm0 |
| 1011 pxor %xmm4,%xmm1 |
| 1012 pxor %xmm0,%xmm4 |
| 1013 psrlq $5,%xmm0 |
| 1014 pxor %xmm4,%xmm0 |
| 1015 psrlq $1,%xmm0 |
| 1016 pxor %xmm1,%xmm0 |
| 1017 .byte 102,15,56,0,197 |
| 1018 movdqu %xmm0,(%eax) |
| 1019 ret |
| 1020 .globl _gcm_ghash_clmul |
| 1021 .align 4 |
| 1022 _gcm_ghash_clmul: |
| 1023 L_gcm_ghash_clmul_begin: |
| 1024 pushl %ebp |
| 1025 pushl %ebx |
| 1026 pushl %esi |
| 1027 pushl %edi |
| 1028 movl 20(%esp),%eax |
| 1029 movl 24(%esp),%edx |
| 1030 movl 28(%esp),%esi |
| 1031 movl 32(%esp),%ebx |
| 1032 call L012pic |
| 1033 L012pic: |
| 1034 popl %ecx |
| 1035 leal Lbswap-L012pic(%ecx),%ecx |
| 1036 movdqu (%eax),%xmm0 |
| 1037 movdqa (%ecx),%xmm5 |
| 1038 movdqu (%edx),%xmm2 |
| 1039 .byte 102,15,56,0,197 |
| 1040 subl $16,%ebx |
| 1041 jz L013odd_tail |
| 1042 movdqu (%esi),%xmm3 |
| 1043 movdqu 16(%esi),%xmm6 |
| 1044 .byte 102,15,56,0,221 |
| 1045 .byte 102,15,56,0,245 |
| 1046 movdqu 32(%edx),%xmm5 |
| 1047 pxor %xmm3,%xmm0 |
| 1048 pshufd $78,%xmm6,%xmm3 |
| 1049 movdqa %xmm6,%xmm7 |
| 1050 pxor %xmm6,%xmm3 |
| 1051 leal 32(%esi),%esi |
| 1052 .byte 102,15,58,68,242,0 |
| 1053 .byte 102,15,58,68,250,17 |
| 1054 .byte 102,15,58,68,221,0 |
| 1055 movups 16(%edx),%xmm2 |
| 1056 nop |
| 1057 subl $32,%ebx |
| 1058 jbe L014even_tail |
| 1059 jmp L015mod_loop |
| 1060 .align 5,0x90 |
| 1061 L015mod_loop: |
| 1062 pshufd $78,%xmm0,%xmm4 |
| 1063 movdqa %xmm0,%xmm1 |
| 1064 pxor %xmm0,%xmm4 |
| 1065 nop |
| 1066 .byte 102,15,58,68,194,0 |
| 1067 .byte 102,15,58,68,202,17 |
| 1068 .byte 102,15,58,68,229,16 |
| 1069 movups (%edx),%xmm2 |
| 1070 xorps %xmm6,%xmm0 |
| 1071 movdqa (%ecx),%xmm5 |
| 1072 xorps %xmm7,%xmm1 |
| 1073 movdqu (%esi),%xmm7 |
| 1074 pxor %xmm0,%xmm3 |
| 1075 movdqu 16(%esi),%xmm6 |
| 1076 pxor %xmm1,%xmm3 |
| 1077 .byte 102,15,56,0,253 |
| 1078 pxor %xmm3,%xmm4 |
| 1079 movdqa %xmm4,%xmm3 |
| 1080 psrldq $8,%xmm4 |
| 1081 pslldq $8,%xmm3 |
| 1082 pxor %xmm4,%xmm1 |
| 1083 pxor %xmm3,%xmm0 |
| 1084 .byte 102,15,56,0,245 |
| 1085 pxor %xmm7,%xmm1 |
| 1086 movdqa %xmm6,%xmm7 |
| 1087 movdqa %xmm0,%xmm4 |
| 1088 movdqa %xmm0,%xmm3 |
| 1089 psllq $5,%xmm0 |
| 1090 pxor %xmm0,%xmm3 |
| 1091 psllq $1,%xmm0 |
| 1092 pxor %xmm3,%xmm0 |
| 1093 .byte 102,15,58,68,242,0 |
| 1094 movups 32(%edx),%xmm5 |
| 1095 psllq $57,%xmm0 |
| 1096 movdqa %xmm0,%xmm3 |
| 1097 pslldq $8,%xmm0 |
| 1098 psrldq $8,%xmm3 |
| 1099 pxor %xmm4,%xmm0 |
| 1100 pxor %xmm3,%xmm1 |
| 1101 pshufd $78,%xmm7,%xmm3 |
| 1102 movdqa %xmm0,%xmm4 |
| 1103 psrlq $1,%xmm0 |
| 1104 pxor %xmm7,%xmm3 |
| 1105 pxor %xmm4,%xmm1 |
| 1106 .byte 102,15,58,68,250,17 |
| 1107 movups 16(%edx),%xmm2 |
| 1108 pxor %xmm0,%xmm4 |
| 1109 psrlq $5,%xmm0 |
| 1110 pxor %xmm4,%xmm0 |
| 1111 psrlq $1,%xmm0 |
| 1112 pxor %xmm1,%xmm0 |
| 1113 .byte 102,15,58,68,221,0 |
| 1114 leal 32(%esi),%esi |
| 1115 subl $32,%ebx |
| 1116 ja L015mod_loop |
| 1117 L014even_tail: |
| 1118 pshufd $78,%xmm0,%xmm4 |
| 1119 movdqa %xmm0,%xmm1 |
| 1120 pxor %xmm0,%xmm4 |
| 1121 .byte 102,15,58,68,194,0 |
| 1122 .byte 102,15,58,68,202,17 |
| 1123 .byte 102,15,58,68,229,16 |
| 1124 movdqa (%ecx),%xmm5 |
| 1125 xorps %xmm6,%xmm0 |
| 1126 xorps %xmm7,%xmm1 |
| 1127 pxor %xmm0,%xmm3 |
| 1128 pxor %xmm1,%xmm3 |
| 1129 pxor %xmm3,%xmm4 |
| 1130 movdqa %xmm4,%xmm3 |
| 1131 psrldq $8,%xmm4 |
| 1132 pslldq $8,%xmm3 |
| 1133 pxor %xmm4,%xmm1 |
| 1134 pxor %xmm3,%xmm0 |
| 1135 movdqa %xmm0,%xmm4 |
| 1136 movdqa %xmm0,%xmm3 |
| 1137 psllq $5,%xmm0 |
| 1138 pxor %xmm0,%xmm3 |
| 1139 psllq $1,%xmm0 |
| 1140 pxor %xmm3,%xmm0 |
| 1141 psllq $57,%xmm0 |
| 1142 movdqa %xmm0,%xmm3 |
| 1143 pslldq $8,%xmm0 |
| 1144 psrldq $8,%xmm3 |
| 1145 pxor %xmm4,%xmm0 |
| 1146 pxor %xmm3,%xmm1 |
| 1147 movdqa %xmm0,%xmm4 |
| 1148 psrlq $1,%xmm0 |
| 1149 pxor %xmm4,%xmm1 |
| 1150 pxor %xmm0,%xmm4 |
| 1151 psrlq $5,%xmm0 |
| 1152 pxor %xmm4,%xmm0 |
| 1153 psrlq $1,%xmm0 |
| 1154 pxor %xmm1,%xmm0 |
| 1155 testl %ebx,%ebx |
| 1156 jnz L016done |
| 1157 movups (%edx),%xmm2 |
| 1158 L013odd_tail: |
| 1159 movdqu (%esi),%xmm3 |
| 1160 .byte 102,15,56,0,221 |
| 1161 pxor %xmm3,%xmm0 |
| 1162 movdqa %xmm0,%xmm1 |
| 1163 pshufd $78,%xmm0,%xmm3 |
| 1164 pshufd $78,%xmm2,%xmm4 |
| 1165 pxor %xmm0,%xmm3 |
| 1166 pxor %xmm2,%xmm4 |
| 1167 .byte 102,15,58,68,194,0 |
| 1168 .byte 102,15,58,68,202,17 |
| 1169 .byte 102,15,58,68,220,0 |
| 1170 xorps %xmm0,%xmm3 |
| 1171 xorps %xmm1,%xmm3 |
| 1172 movdqa %xmm3,%xmm4 |
| 1173 psrldq $8,%xmm3 |
| 1174 pslldq $8,%xmm4 |
| 1175 pxor %xmm3,%xmm1 |
| 1176 pxor %xmm4,%xmm0 |
| 1177 movdqa %xmm0,%xmm4 |
| 1178 movdqa %xmm0,%xmm3 |
| 1179 psllq $5,%xmm0 |
| 1180 pxor %xmm0,%xmm3 |
| 1181 psllq $1,%xmm0 |
| 1182 pxor %xmm3,%xmm0 |
| 1183 psllq $57,%xmm0 |
| 1184 movdqa %xmm0,%xmm3 |
| 1185 pslldq $8,%xmm0 |
| 1186 psrldq $8,%xmm3 |
| 1187 pxor %xmm4,%xmm0 |
| 1188 pxor %xmm3,%xmm1 |
| 1189 movdqa %xmm0,%xmm4 |
| 1190 psrlq $1,%xmm0 |
| 1191 pxor %xmm4,%xmm1 |
| 1192 pxor %xmm0,%xmm4 |
| 1193 psrlq $5,%xmm0 |
| 1194 pxor %xmm4,%xmm0 |
| 1195 psrlq $1,%xmm0 |
| 1196 pxor %xmm1,%xmm0 |
| 1197 L016done: |
| 1198 .byte 102,15,56,0,197 |
| 1199 movdqu %xmm0,(%eax) |
| 1200 popl %edi |
| 1201 popl %esi |
| 1202 popl %ebx |
| 1203 popl %ebp |
| 1204 ret |
| 1205 .align 6,0x90 |
| 1206 Lbswap: |
| 1207 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 1208 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 |
| 1209 .align 6,0x90 |
| 1210 Lrem_8bit: |
| 1211 .value 0,450,900,582,1800,1738,1164,1358 |
| 1212 .value 3600,4050,3476,3158,2328,2266,2716,2910 |
| 1213 .value 7200,7650,8100,7782,6952,6890,6316,6510 |
| 1214 .value 4656,5106,4532,4214,5432,5370,5820,6014 |
| 1215 .value 14400,14722,15300,14854,16200,16010,15564,15630 |
| 1216 .value 13904,14226,13780,13334,12632,12442,13020,13086 |
| 1217 .value 9312,9634,10212,9766,9064,8874,8428,8494 |
| 1218 .value 10864,11186,10740,10294,11640,11450,12028,12094 |
| 1219 .value 28800,28994,29444,29382,30600,30282,29708,30158 |
| 1220 .value 32400,32594,32020,31958,31128,30810,31260,31710 |
| 1221 .value 27808,28002,28452,28390,27560,27242,26668,27118 |
| 1222 .value 25264,25458,24884,24822,26040,25722,26172,26622 |
| 1223 .value 18624,18690,19268,19078,20424,19978,19532,19854 |
| 1224 .value 18128,18194,17748,17558,16856,16410,16988,17310 |
| 1225 .value 21728,21794,22372,22182,21480,21034,20588,20910 |
| 1226 .value 23280,23346,22900,22710,24056,23610,24188,24510 |
| 1227 .value 57600,57538,57988,58182,58888,59338,58764,58446 |
| 1228 .value 61200,61138,60564,60758,59416,59866,60316,59998 |
| 1229 .value 64800,64738,65188,65382,64040,64490,63916,63598 |
| 1230 .value 62256,62194,61620,61814,62520,62970,63420,63102 |
| 1231 .value 55616,55426,56004,56070,56904,57226,56780,56334 |
| 1232 .value 55120,54930,54484,54550,53336,53658,54236,53790 |
| 1233 .value 50528,50338,50916,50982,49768,50090,49644,49198 |
| 1234 .value 52080,51890,51444,51510,52344,52666,53244,52798 |
| 1235 .value 37248,36930,37380,37830,38536,38730,38156,38094 |
| 1236 .value 40848,40530,39956,40406,39064,39258,39708,39646 |
| 1237 .value 36256,35938,36388,36838,35496,35690,35116,35054 |
| 1238 .value 33712,33394,32820,33270,33976,34170,34620,34558 |
| 1239 .value 43456,43010,43588,43910,44744,44810,44364,44174 |
| 1240 .value 42960,42514,42068,42390,41176,41242,41820,41630 |
| 1241 .value 46560,46114,46692,47014,45800,45866,45420,45230 |
| 1242 .value 48112,47666,47220,47542,48376,48442,49020,48830 |
| 1243 .align 6,0x90 |
| 1244 Lrem_4bit: |
| 1245 .long 0,0,0,471859200,0,943718400,0,610271232 |
| 1246 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
| 1247 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
| 1248 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
| 1249 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 |
| 1250 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 |
| 1251 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 |
| 1252 .byte 0 |
| 1253 #endif |
OLD | NEW |