OLD | NEW |
(Empty) | |
| 1 #if defined(__i386__) |
| 2 .file "ghash-x86.S" |
| 3 .text |
| 4 .globl gcm_gmult_4bit_x86 |
| 5 .type gcm_gmult_4bit_x86,@function |
| 6 .align 16 |
| 7 gcm_gmult_4bit_x86: |
| 8 .L_gcm_gmult_4bit_x86_begin: |
| 9 pushl %ebp |
| 10 pushl %ebx |
| 11 pushl %esi |
| 12 pushl %edi |
| 13 subl $84,%esp |
| 14 movl 104(%esp),%edi |
| 15 movl 108(%esp),%esi |
| 16 movl (%edi),%ebp |
| 17 movl 4(%edi),%edx |
| 18 movl 8(%edi),%ecx |
| 19 movl 12(%edi),%ebx |
| 20 movl $0,16(%esp) |
| 21 movl $471859200,20(%esp) |
| 22 movl $943718400,24(%esp) |
| 23 movl $610271232,28(%esp) |
| 24 movl $1887436800,32(%esp) |
| 25 movl $1822425088,36(%esp) |
| 26 movl $1220542464,40(%esp) |
| 27 movl $1423966208,44(%esp) |
| 28 movl $3774873600,48(%esp) |
| 29 movl $4246732800,52(%esp) |
| 30 movl $3644850176,56(%esp) |
| 31 movl $3311403008,60(%esp) |
| 32 movl $2441084928,64(%esp) |
| 33 movl $2376073216,68(%esp) |
| 34 movl $2847932416,72(%esp) |
| 35 movl $3051356160,76(%esp) |
| 36 movl %ebp,(%esp) |
| 37 movl %edx,4(%esp) |
| 38 movl %ecx,8(%esp) |
| 39 movl %ebx,12(%esp) |
| 40 shrl $20,%ebx |
| 41 andl $240,%ebx |
| 42 movl 4(%esi,%ebx,1),%ebp |
| 43 movl (%esi,%ebx,1),%edx |
| 44 movl 12(%esi,%ebx,1),%ecx |
| 45 movl 8(%esi,%ebx,1),%ebx |
| 46 xorl %eax,%eax |
| 47 movl $15,%edi |
| 48 jmp .L000x86_loop |
| 49 .align 16 |
| 50 .L000x86_loop: |
| 51 movb %bl,%al |
| 52 shrdl $4,%ecx,%ebx |
| 53 andb $15,%al |
| 54 shrdl $4,%edx,%ecx |
| 55 shrdl $4,%ebp,%edx |
| 56 shrl $4,%ebp |
| 57 xorl 16(%esp,%eax,4),%ebp |
| 58 movb (%esp,%edi,1),%al |
| 59 andb $240,%al |
| 60 xorl 8(%esi,%eax,1),%ebx |
| 61 xorl 12(%esi,%eax,1),%ecx |
| 62 xorl (%esi,%eax,1),%edx |
| 63 xorl 4(%esi,%eax,1),%ebp |
| 64 decl %edi |
| 65 js .L001x86_break |
| 66 movb %bl,%al |
| 67 shrdl $4,%ecx,%ebx |
| 68 andb $15,%al |
| 69 shrdl $4,%edx,%ecx |
| 70 shrdl $4,%ebp,%edx |
| 71 shrl $4,%ebp |
| 72 xorl 16(%esp,%eax,4),%ebp |
| 73 movb (%esp,%edi,1),%al |
| 74 shlb $4,%al |
| 75 xorl 8(%esi,%eax,1),%ebx |
| 76 xorl 12(%esi,%eax,1),%ecx |
| 77 xorl (%esi,%eax,1),%edx |
| 78 xorl 4(%esi,%eax,1),%ebp |
| 79 jmp .L000x86_loop |
| 80 .align 16 |
| 81 .L001x86_break: |
| 82 bswap %ebx |
| 83 bswap %ecx |
| 84 bswap %edx |
| 85 bswap %ebp |
| 86 movl 104(%esp),%edi |
| 87 movl %ebx,12(%edi) |
| 88 movl %ecx,8(%edi) |
| 89 movl %edx,4(%edi) |
| 90 movl %ebp,(%edi) |
| 91 addl $84,%esp |
| 92 popl %edi |
| 93 popl %esi |
| 94 popl %ebx |
| 95 popl %ebp |
| 96 ret |
| 97 .size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin |
| 98 .globl gcm_ghash_4bit_x86 |
| 99 .type gcm_ghash_4bit_x86,@function |
| 100 .align 16 |
| 101 gcm_ghash_4bit_x86: |
| 102 .L_gcm_ghash_4bit_x86_begin: |
| 103 pushl %ebp |
| 104 pushl %ebx |
| 105 pushl %esi |
| 106 pushl %edi |
| 107 subl $84,%esp |
| 108 movl 104(%esp),%ebx |
| 109 movl 108(%esp),%esi |
| 110 movl 112(%esp),%edi |
| 111 movl 116(%esp),%ecx |
| 112 addl %edi,%ecx |
| 113 movl %ecx,116(%esp) |
| 114 movl (%ebx),%ebp |
| 115 movl 4(%ebx),%edx |
| 116 movl 8(%ebx),%ecx |
| 117 movl 12(%ebx),%ebx |
| 118 movl $0,16(%esp) |
| 119 movl $471859200,20(%esp) |
| 120 movl $943718400,24(%esp) |
| 121 movl $610271232,28(%esp) |
| 122 movl $1887436800,32(%esp) |
| 123 movl $1822425088,36(%esp) |
| 124 movl $1220542464,40(%esp) |
| 125 movl $1423966208,44(%esp) |
| 126 movl $3774873600,48(%esp) |
| 127 movl $4246732800,52(%esp) |
| 128 movl $3644850176,56(%esp) |
| 129 movl $3311403008,60(%esp) |
| 130 movl $2441084928,64(%esp) |
| 131 movl $2376073216,68(%esp) |
| 132 movl $2847932416,72(%esp) |
| 133 movl $3051356160,76(%esp) |
| 134 .align 16 |
| 135 .L002x86_outer_loop: |
| 136 xorl 12(%edi),%ebx |
| 137 xorl 8(%edi),%ecx |
| 138 xorl 4(%edi),%edx |
| 139 xorl (%edi),%ebp |
| 140 movl %ebx,12(%esp) |
| 141 movl %ecx,8(%esp) |
| 142 movl %edx,4(%esp) |
| 143 movl %ebp,(%esp) |
| 144 shrl $20,%ebx |
| 145 andl $240,%ebx |
| 146 movl 4(%esi,%ebx,1),%ebp |
| 147 movl (%esi,%ebx,1),%edx |
| 148 movl 12(%esi,%ebx,1),%ecx |
| 149 movl 8(%esi,%ebx,1),%ebx |
| 150 xorl %eax,%eax |
| 151 movl $15,%edi |
| 152 jmp .L003x86_loop |
| 153 .align 16 |
| 154 .L003x86_loop: |
| 155 movb %bl,%al |
| 156 shrdl $4,%ecx,%ebx |
| 157 andb $15,%al |
| 158 shrdl $4,%edx,%ecx |
| 159 shrdl $4,%ebp,%edx |
| 160 shrl $4,%ebp |
| 161 xorl 16(%esp,%eax,4),%ebp |
| 162 movb (%esp,%edi,1),%al |
| 163 andb $240,%al |
| 164 xorl 8(%esi,%eax,1),%ebx |
| 165 xorl 12(%esi,%eax,1),%ecx |
| 166 xorl (%esi,%eax,1),%edx |
| 167 xorl 4(%esi,%eax,1),%ebp |
| 168 decl %edi |
| 169 js .L004x86_break |
| 170 movb %bl,%al |
| 171 shrdl $4,%ecx,%ebx |
| 172 andb $15,%al |
| 173 shrdl $4,%edx,%ecx |
| 174 shrdl $4,%ebp,%edx |
| 175 shrl $4,%ebp |
| 176 xorl 16(%esp,%eax,4),%ebp |
| 177 movb (%esp,%edi,1),%al |
| 178 shlb $4,%al |
| 179 xorl 8(%esi,%eax,1),%ebx |
| 180 xorl 12(%esi,%eax,1),%ecx |
| 181 xorl (%esi,%eax,1),%edx |
| 182 xorl 4(%esi,%eax,1),%ebp |
| 183 jmp .L003x86_loop |
| 184 .align 16 |
| 185 .L004x86_break: |
| 186 bswap %ebx |
| 187 bswap %ecx |
| 188 bswap %edx |
| 189 bswap %ebp |
| 190 movl 112(%esp),%edi |
| 191 leal 16(%edi),%edi |
| 192 cmpl 116(%esp),%edi |
| 193 movl %edi,112(%esp) |
| 194 jb .L002x86_outer_loop |
| 195 movl 104(%esp),%edi |
| 196 movl %ebx,12(%edi) |
| 197 movl %ecx,8(%edi) |
| 198 movl %edx,4(%edi) |
| 199 movl %ebp,(%edi) |
| 200 addl $84,%esp |
| 201 popl %edi |
| 202 popl %esi |
| 203 popl %ebx |
| 204 popl %ebp |
| 205 ret |
| 206 .size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin |
| 207 .globl gcm_gmult_4bit_mmx |
| 208 .type gcm_gmult_4bit_mmx,@function |
| 209 .align 16 |
| 210 gcm_gmult_4bit_mmx: |
| 211 .L_gcm_gmult_4bit_mmx_begin: |
| 212 pushl %ebp |
| 213 pushl %ebx |
| 214 pushl %esi |
| 215 pushl %edi |
| 216 movl 20(%esp),%edi |
| 217 movl 24(%esp),%esi |
| 218 call .L005pic_point |
| 219 .L005pic_point: |
| 220 popl %eax |
| 221 leal .Lrem_4bit-.L005pic_point(%eax),%eax |
| 222 movzbl 15(%edi),%ebx |
| 223 xorl %ecx,%ecx |
| 224 movl %ebx,%edx |
| 225 movb %dl,%cl |
| 226 movl $14,%ebp |
| 227 shlb $4,%cl |
| 228 andl $240,%edx |
| 229 movq 8(%esi,%ecx,1),%mm0 |
| 230 movq (%esi,%ecx,1),%mm1 |
| 231 movd %mm0,%ebx |
| 232 jmp .L006mmx_loop |
| 233 .align 16 |
| 234 .L006mmx_loop: |
| 235 psrlq $4,%mm0 |
| 236 andl $15,%ebx |
| 237 movq %mm1,%mm2 |
| 238 psrlq $4,%mm1 |
| 239 pxor 8(%esi,%edx,1),%mm0 |
| 240 movb (%edi,%ebp,1),%cl |
| 241 psllq $60,%mm2 |
| 242 pxor (%eax,%ebx,8),%mm1 |
| 243 decl %ebp |
| 244 movd %mm0,%ebx |
| 245 pxor (%esi,%edx,1),%mm1 |
| 246 movl %ecx,%edx |
| 247 pxor %mm2,%mm0 |
| 248 js .L007mmx_break |
| 249 shlb $4,%cl |
| 250 andl $15,%ebx |
| 251 psrlq $4,%mm0 |
| 252 andl $240,%edx |
| 253 movq %mm1,%mm2 |
| 254 psrlq $4,%mm1 |
| 255 pxor 8(%esi,%ecx,1),%mm0 |
| 256 psllq $60,%mm2 |
| 257 pxor (%eax,%ebx,8),%mm1 |
| 258 movd %mm0,%ebx |
| 259 pxor (%esi,%ecx,1),%mm1 |
| 260 pxor %mm2,%mm0 |
| 261 jmp .L006mmx_loop |
| 262 .align 16 |
| 263 .L007mmx_break: |
| 264 shlb $4,%cl |
| 265 andl $15,%ebx |
| 266 psrlq $4,%mm0 |
| 267 andl $240,%edx |
| 268 movq %mm1,%mm2 |
| 269 psrlq $4,%mm1 |
| 270 pxor 8(%esi,%ecx,1),%mm0 |
| 271 psllq $60,%mm2 |
| 272 pxor (%eax,%ebx,8),%mm1 |
| 273 movd %mm0,%ebx |
| 274 pxor (%esi,%ecx,1),%mm1 |
| 275 pxor %mm2,%mm0 |
| 276 psrlq $4,%mm0 |
| 277 andl $15,%ebx |
| 278 movq %mm1,%mm2 |
| 279 psrlq $4,%mm1 |
| 280 pxor 8(%esi,%edx,1),%mm0 |
| 281 psllq $60,%mm2 |
| 282 pxor (%eax,%ebx,8),%mm1 |
| 283 movd %mm0,%ebx |
| 284 pxor (%esi,%edx,1),%mm1 |
| 285 pxor %mm2,%mm0 |
| 286 psrlq $32,%mm0 |
| 287 movd %mm1,%edx |
| 288 psrlq $32,%mm1 |
| 289 movd %mm0,%ecx |
| 290 movd %mm1,%ebp |
| 291 bswap %ebx |
| 292 bswap %edx |
| 293 bswap %ecx |
| 294 bswap %ebp |
| 295 emms |
| 296 movl %ebx,12(%edi) |
| 297 movl %edx,4(%edi) |
| 298 movl %ecx,8(%edi) |
| 299 movl %ebp,(%edi) |
| 300 popl %edi |
| 301 popl %esi |
| 302 popl %ebx |
| 303 popl %ebp |
| 304 ret |
| 305 .size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin |
| 306 .globl gcm_ghash_4bit_mmx |
| 307 .type gcm_ghash_4bit_mmx,@function |
| 308 .align 16 |
| 309 gcm_ghash_4bit_mmx: |
| 310 .L_gcm_ghash_4bit_mmx_begin: |
| 311 pushl %ebp |
| 312 pushl %ebx |
| 313 pushl %esi |
| 314 pushl %edi |
| 315 movl 20(%esp),%eax |
| 316 movl 24(%esp),%ebx |
| 317 movl 28(%esp),%ecx |
| 318 movl 32(%esp),%edx |
| 319 movl %esp,%ebp |
| 320 call .L008pic_point |
| 321 .L008pic_point: |
| 322 popl %esi |
| 323 leal .Lrem_8bit-.L008pic_point(%esi),%esi |
| 324 subl $544,%esp |
| 325 andl $-64,%esp |
| 326 subl $16,%esp |
| 327 addl %ecx,%edx |
| 328 movl %eax,544(%esp) |
| 329 movl %edx,552(%esp) |
| 330 movl %ebp,556(%esp) |
| 331 addl $128,%ebx |
| 332 leal 144(%esp),%edi |
| 333 leal 400(%esp),%ebp |
| 334 movl -120(%ebx),%edx |
| 335 movq -120(%ebx),%mm0 |
| 336 movq -128(%ebx),%mm3 |
| 337 shll $4,%edx |
| 338 movb %dl,(%esp) |
| 339 movl -104(%ebx),%edx |
| 340 movq -104(%ebx),%mm2 |
| 341 movq -112(%ebx),%mm5 |
| 342 movq %mm0,-128(%edi) |
| 343 psrlq $4,%mm0 |
| 344 movq %mm3,(%edi) |
| 345 movq %mm3,%mm7 |
| 346 psrlq $4,%mm3 |
| 347 shll $4,%edx |
| 348 movb %dl,1(%esp) |
| 349 movl -88(%ebx),%edx |
| 350 movq -88(%ebx),%mm1 |
| 351 psllq $60,%mm7 |
| 352 movq -96(%ebx),%mm4 |
| 353 por %mm7,%mm0 |
| 354 movq %mm2,-120(%edi) |
| 355 psrlq $4,%mm2 |
| 356 movq %mm5,8(%edi) |
| 357 movq %mm5,%mm6 |
| 358 movq %mm0,-128(%ebp) |
| 359 psrlq $4,%mm5 |
| 360 movq %mm3,(%ebp) |
| 361 shll $4,%edx |
| 362 movb %dl,2(%esp) |
| 363 movl -72(%ebx),%edx |
| 364 movq -72(%ebx),%mm0 |
| 365 psllq $60,%mm6 |
| 366 movq -80(%ebx),%mm3 |
| 367 por %mm6,%mm2 |
| 368 movq %mm1,-112(%edi) |
| 369 psrlq $4,%mm1 |
| 370 movq %mm4,16(%edi) |
| 371 movq %mm4,%mm7 |
| 372 movq %mm2,-120(%ebp) |
| 373 psrlq $4,%mm4 |
| 374 movq %mm5,8(%ebp) |
| 375 shll $4,%edx |
| 376 movb %dl,3(%esp) |
| 377 movl -56(%ebx),%edx |
| 378 movq -56(%ebx),%mm2 |
| 379 psllq $60,%mm7 |
| 380 movq -64(%ebx),%mm5 |
| 381 por %mm7,%mm1 |
| 382 movq %mm0,-104(%edi) |
| 383 psrlq $4,%mm0 |
| 384 movq %mm3,24(%edi) |
| 385 movq %mm3,%mm6 |
| 386 movq %mm1,-112(%ebp) |
| 387 psrlq $4,%mm3 |
| 388 movq %mm4,16(%ebp) |
| 389 shll $4,%edx |
| 390 movb %dl,4(%esp) |
| 391 movl -40(%ebx),%edx |
| 392 movq -40(%ebx),%mm1 |
| 393 psllq $60,%mm6 |
| 394 movq -48(%ebx),%mm4 |
| 395 por %mm6,%mm0 |
| 396 movq %mm2,-96(%edi) |
| 397 psrlq $4,%mm2 |
| 398 movq %mm5,32(%edi) |
| 399 movq %mm5,%mm7 |
| 400 movq %mm0,-104(%ebp) |
| 401 psrlq $4,%mm5 |
| 402 movq %mm3,24(%ebp) |
| 403 shll $4,%edx |
| 404 movb %dl,5(%esp) |
| 405 movl -24(%ebx),%edx |
| 406 movq -24(%ebx),%mm0 |
| 407 psllq $60,%mm7 |
| 408 movq -32(%ebx),%mm3 |
| 409 por %mm7,%mm2 |
| 410 movq %mm1,-88(%edi) |
| 411 psrlq $4,%mm1 |
| 412 movq %mm4,40(%edi) |
| 413 movq %mm4,%mm6 |
| 414 movq %mm2,-96(%ebp) |
| 415 psrlq $4,%mm4 |
| 416 movq %mm5,32(%ebp) |
| 417 shll $4,%edx |
| 418 movb %dl,6(%esp) |
| 419 movl -8(%ebx),%edx |
| 420 movq -8(%ebx),%mm2 |
| 421 psllq $60,%mm6 |
| 422 movq -16(%ebx),%mm5 |
| 423 por %mm6,%mm1 |
| 424 movq %mm0,-80(%edi) |
| 425 psrlq $4,%mm0 |
| 426 movq %mm3,48(%edi) |
| 427 movq %mm3,%mm7 |
| 428 movq %mm1,-88(%ebp) |
| 429 psrlq $4,%mm3 |
| 430 movq %mm4,40(%ebp) |
| 431 shll $4,%edx |
| 432 movb %dl,7(%esp) |
| 433 movl 8(%ebx),%edx |
| 434 movq 8(%ebx),%mm1 |
| 435 psllq $60,%mm7 |
| 436 movq (%ebx),%mm4 |
| 437 por %mm7,%mm0 |
| 438 movq %mm2,-72(%edi) |
| 439 psrlq $4,%mm2 |
| 440 movq %mm5,56(%edi) |
| 441 movq %mm5,%mm6 |
| 442 movq %mm0,-80(%ebp) |
| 443 psrlq $4,%mm5 |
| 444 movq %mm3,48(%ebp) |
| 445 shll $4,%edx |
| 446 movb %dl,8(%esp) |
| 447 movl 24(%ebx),%edx |
| 448 movq 24(%ebx),%mm0 |
| 449 psllq $60,%mm6 |
| 450 movq 16(%ebx),%mm3 |
| 451 por %mm6,%mm2 |
| 452 movq %mm1,-64(%edi) |
| 453 psrlq $4,%mm1 |
| 454 movq %mm4,64(%edi) |
| 455 movq %mm4,%mm7 |
| 456 movq %mm2,-72(%ebp) |
| 457 psrlq $4,%mm4 |
| 458 movq %mm5,56(%ebp) |
| 459 shll $4,%edx |
| 460 movb %dl,9(%esp) |
| 461 movl 40(%ebx),%edx |
| 462 movq 40(%ebx),%mm2 |
| 463 psllq $60,%mm7 |
| 464 movq 32(%ebx),%mm5 |
| 465 por %mm7,%mm1 |
| 466 movq %mm0,-56(%edi) |
| 467 psrlq $4,%mm0 |
| 468 movq %mm3,72(%edi) |
| 469 movq %mm3,%mm6 |
| 470 movq %mm1,-64(%ebp) |
| 471 psrlq $4,%mm3 |
| 472 movq %mm4,64(%ebp) |
| 473 shll $4,%edx |
| 474 movb %dl,10(%esp) |
| 475 movl 56(%ebx),%edx |
| 476 movq 56(%ebx),%mm1 |
| 477 psllq $60,%mm6 |
| 478 movq 48(%ebx),%mm4 |
| 479 por %mm6,%mm0 |
| 480 movq %mm2,-48(%edi) |
| 481 psrlq $4,%mm2 |
| 482 movq %mm5,80(%edi) |
| 483 movq %mm5,%mm7 |
| 484 movq %mm0,-56(%ebp) |
| 485 psrlq $4,%mm5 |
| 486 movq %mm3,72(%ebp) |
| 487 shll $4,%edx |
| 488 movb %dl,11(%esp) |
| 489 movl 72(%ebx),%edx |
| 490 movq 72(%ebx),%mm0 |
| 491 psllq $60,%mm7 |
| 492 movq 64(%ebx),%mm3 |
| 493 por %mm7,%mm2 |
| 494 movq %mm1,-40(%edi) |
| 495 psrlq $4,%mm1 |
| 496 movq %mm4,88(%edi) |
| 497 movq %mm4,%mm6 |
| 498 movq %mm2,-48(%ebp) |
| 499 psrlq $4,%mm4 |
| 500 movq %mm5,80(%ebp) |
| 501 shll $4,%edx |
| 502 movb %dl,12(%esp) |
| 503 movl 88(%ebx),%edx |
| 504 movq 88(%ebx),%mm2 |
| 505 psllq $60,%mm6 |
| 506 movq 80(%ebx),%mm5 |
| 507 por %mm6,%mm1 |
| 508 movq %mm0,-32(%edi) |
| 509 psrlq $4,%mm0 |
| 510 movq %mm3,96(%edi) |
| 511 movq %mm3,%mm7 |
| 512 movq %mm1,-40(%ebp) |
| 513 psrlq $4,%mm3 |
| 514 movq %mm4,88(%ebp) |
| 515 shll $4,%edx |
| 516 movb %dl,13(%esp) |
| 517 movl 104(%ebx),%edx |
| 518 movq 104(%ebx),%mm1 |
| 519 psllq $60,%mm7 |
| 520 movq 96(%ebx),%mm4 |
| 521 por %mm7,%mm0 |
| 522 movq %mm2,-24(%edi) |
| 523 psrlq $4,%mm2 |
| 524 movq %mm5,104(%edi) |
| 525 movq %mm5,%mm6 |
| 526 movq %mm0,-32(%ebp) |
| 527 psrlq $4,%mm5 |
| 528 movq %mm3,96(%ebp) |
| 529 shll $4,%edx |
| 530 movb %dl,14(%esp) |
| 531 movl 120(%ebx),%edx |
| 532 movq 120(%ebx),%mm0 |
| 533 psllq $60,%mm6 |
| 534 movq 112(%ebx),%mm3 |
| 535 por %mm6,%mm2 |
| 536 movq %mm1,-16(%edi) |
| 537 psrlq $4,%mm1 |
| 538 movq %mm4,112(%edi) |
| 539 movq %mm4,%mm7 |
| 540 movq %mm2,-24(%ebp) |
| 541 psrlq $4,%mm4 |
| 542 movq %mm5,104(%ebp) |
| 543 shll $4,%edx |
| 544 movb %dl,15(%esp) |
| 545 psllq $60,%mm7 |
| 546 por %mm7,%mm1 |
| 547 movq %mm0,-8(%edi) |
| 548 psrlq $4,%mm0 |
| 549 movq %mm3,120(%edi) |
| 550 movq %mm3,%mm6 |
| 551 movq %mm1,-16(%ebp) |
| 552 psrlq $4,%mm3 |
| 553 movq %mm4,112(%ebp) |
| 554 psllq $60,%mm6 |
| 555 por %mm6,%mm0 |
| 556 movq %mm0,-8(%ebp) |
| 557 movq %mm3,120(%ebp) |
| 558 movq (%eax),%mm6 |
| 559 movl 8(%eax),%ebx |
| 560 movl 12(%eax),%edx |
| 561 .align 16 |
| 562 .L009outer: |
| 563 xorl 12(%ecx),%edx |
| 564 xorl 8(%ecx),%ebx |
| 565 pxor (%ecx),%mm6 |
| 566 leal 16(%ecx),%ecx |
| 567 movl %ebx,536(%esp) |
| 568 movq %mm6,528(%esp) |
| 569 movl %ecx,548(%esp) |
| 570 xorl %eax,%eax |
| 571 roll $8,%edx |
| 572 movb %dl,%al |
| 573 movl %eax,%ebp |
| 574 andb $15,%al |
| 575 shrl $4,%ebp |
| 576 pxor %mm0,%mm0 |
| 577 roll $8,%edx |
| 578 pxor %mm1,%mm1 |
| 579 pxor %mm2,%mm2 |
| 580 movq 16(%esp,%eax,8),%mm7 |
| 581 movq 144(%esp,%eax,8),%mm6 |
| 582 movb %dl,%al |
| 583 movd %mm7,%ebx |
| 584 psrlq $8,%mm7 |
| 585 movq %mm6,%mm3 |
| 586 movl %eax,%edi |
| 587 psrlq $8,%mm6 |
| 588 pxor 272(%esp,%ebp,8),%mm7 |
| 589 andb $15,%al |
| 590 psllq $56,%mm3 |
| 591 shrl $4,%edi |
| 592 pxor 16(%esp,%eax,8),%mm7 |
| 593 roll $8,%edx |
| 594 pxor 144(%esp,%eax,8),%mm6 |
| 595 pxor %mm3,%mm7 |
| 596 pxor 400(%esp,%ebp,8),%mm6 |
| 597 xorb (%esp,%ebp,1),%bl |
| 598 movb %dl,%al |
| 599 movd %mm7,%ecx |
| 600 movzbl %bl,%ebx |
| 601 psrlq $8,%mm7 |
| 602 movq %mm6,%mm3 |
| 603 movl %eax,%ebp |
| 604 psrlq $8,%mm6 |
| 605 pxor 272(%esp,%edi,8),%mm7 |
| 606 andb $15,%al |
| 607 psllq $56,%mm3 |
| 608 shrl $4,%ebp |
| 609 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 610 pxor 16(%esp,%eax,8),%mm7 |
| 611 roll $8,%edx |
| 612 pxor 144(%esp,%eax,8),%mm6 |
| 613 pxor %mm3,%mm7 |
| 614 pxor 400(%esp,%edi,8),%mm6 |
| 615 xorb (%esp,%edi,1),%cl |
| 616 movb %dl,%al |
| 617 movl 536(%esp),%edx |
| 618 movd %mm7,%ebx |
| 619 movzbl %cl,%ecx |
| 620 psrlq $8,%mm7 |
| 621 movq %mm6,%mm3 |
| 622 movl %eax,%edi |
| 623 psrlq $8,%mm6 |
| 624 pxor 272(%esp,%ebp,8),%mm7 |
| 625 andb $15,%al |
| 626 psllq $56,%mm3 |
| 627 pxor %mm2,%mm6 |
| 628 shrl $4,%edi |
| 629 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 630 pxor 16(%esp,%eax,8),%mm7 |
| 631 roll $8,%edx |
| 632 pxor 144(%esp,%eax,8),%mm6 |
| 633 pxor %mm3,%mm7 |
| 634 pxor 400(%esp,%ebp,8),%mm6 |
| 635 xorb (%esp,%ebp,1),%bl |
| 636 movb %dl,%al |
| 637 movd %mm7,%ecx |
| 638 movzbl %bl,%ebx |
| 639 psrlq $8,%mm7 |
| 640 movq %mm6,%mm3 |
| 641 movl %eax,%ebp |
| 642 psrlq $8,%mm6 |
| 643 pxor 272(%esp,%edi,8),%mm7 |
| 644 andb $15,%al |
| 645 psllq $56,%mm3 |
| 646 pxor %mm1,%mm6 |
| 647 shrl $4,%ebp |
| 648 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 649 pxor 16(%esp,%eax,8),%mm7 |
| 650 roll $8,%edx |
| 651 pxor 144(%esp,%eax,8),%mm6 |
| 652 pxor %mm3,%mm7 |
| 653 pxor 400(%esp,%edi,8),%mm6 |
| 654 xorb (%esp,%edi,1),%cl |
| 655 movb %dl,%al |
| 656 movd %mm7,%ebx |
| 657 movzbl %cl,%ecx |
| 658 psrlq $8,%mm7 |
| 659 movq %mm6,%mm3 |
| 660 movl %eax,%edi |
| 661 psrlq $8,%mm6 |
| 662 pxor 272(%esp,%ebp,8),%mm7 |
| 663 andb $15,%al |
| 664 psllq $56,%mm3 |
| 665 pxor %mm0,%mm6 |
| 666 shrl $4,%edi |
| 667 pinsrw $2,(%esi,%ecx,2),%mm2 |
| 668 pxor 16(%esp,%eax,8),%mm7 |
| 669 roll $8,%edx |
| 670 pxor 144(%esp,%eax,8),%mm6 |
| 671 pxor %mm3,%mm7 |
| 672 pxor 400(%esp,%ebp,8),%mm6 |
| 673 xorb (%esp,%ebp,1),%bl |
| 674 movb %dl,%al |
| 675 movd %mm7,%ecx |
| 676 movzbl %bl,%ebx |
| 677 psrlq $8,%mm7 |
| 678 movq %mm6,%mm3 |
| 679 movl %eax,%ebp |
| 680 psrlq $8,%mm6 |
| 681 pxor 272(%esp,%edi,8),%mm7 |
| 682 andb $15,%al |
| 683 psllq $56,%mm3 |
| 684 pxor %mm2,%mm6 |
| 685 shrl $4,%ebp |
| 686 pinsrw $2,(%esi,%ebx,2),%mm1 |
| 687 pxor 16(%esp,%eax,8),%mm7 |
| 688 roll $8,%edx |
| 689 pxor 144(%esp,%eax,8),%mm6 |
| 690 pxor %mm3,%mm7 |
| 691 pxor 400(%esp,%edi,8),%mm6 |
| 692 xorb (%esp,%edi,1),%cl |
| 693 movb %dl,%al |
| 694 movl 532(%esp),%edx |
| 695 movd %mm7,%ebx |
| 696 movzbl %cl,%ecx |
| 697 psrlq $8,%mm7 |
| 698 movq %mm6,%mm3 |
| 699 movl %eax,%edi |
| 700 psrlq $8,%mm6 |
| 701 pxor 272(%esp,%ebp,8),%mm7 |
| 702 andb $15,%al |
| 703 psllq $56,%mm3 |
| 704 pxor %mm1,%mm6 |
| 705 shrl $4,%edi |
| 706 pinsrw $2,(%esi,%ecx,2),%mm0 |
| 707 pxor 16(%esp,%eax,8),%mm7 |
| 708 roll $8,%edx |
| 709 pxor 144(%esp,%eax,8),%mm6 |
| 710 pxor %mm3,%mm7 |
| 711 pxor 400(%esp,%ebp,8),%mm6 |
| 712 xorb (%esp,%ebp,1),%bl |
| 713 movb %dl,%al |
| 714 movd %mm7,%ecx |
| 715 movzbl %bl,%ebx |
| 716 psrlq $8,%mm7 |
| 717 movq %mm6,%mm3 |
| 718 movl %eax,%ebp |
| 719 psrlq $8,%mm6 |
| 720 pxor 272(%esp,%edi,8),%mm7 |
| 721 andb $15,%al |
| 722 psllq $56,%mm3 |
| 723 pxor %mm0,%mm6 |
| 724 shrl $4,%ebp |
| 725 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 726 pxor 16(%esp,%eax,8),%mm7 |
| 727 roll $8,%edx |
| 728 pxor 144(%esp,%eax,8),%mm6 |
| 729 pxor %mm3,%mm7 |
| 730 pxor 400(%esp,%edi,8),%mm6 |
| 731 xorb (%esp,%edi,1),%cl |
| 732 movb %dl,%al |
| 733 movd %mm7,%ebx |
| 734 movzbl %cl,%ecx |
| 735 psrlq $8,%mm7 |
| 736 movq %mm6,%mm3 |
| 737 movl %eax,%edi |
| 738 psrlq $8,%mm6 |
| 739 pxor 272(%esp,%ebp,8),%mm7 |
| 740 andb $15,%al |
| 741 psllq $56,%mm3 |
| 742 pxor %mm2,%mm6 |
| 743 shrl $4,%edi |
| 744 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 745 pxor 16(%esp,%eax,8),%mm7 |
| 746 roll $8,%edx |
| 747 pxor 144(%esp,%eax,8),%mm6 |
| 748 pxor %mm3,%mm7 |
| 749 pxor 400(%esp,%ebp,8),%mm6 |
| 750 xorb (%esp,%ebp,1),%bl |
| 751 movb %dl,%al |
| 752 movd %mm7,%ecx |
| 753 movzbl %bl,%ebx |
| 754 psrlq $8,%mm7 |
| 755 movq %mm6,%mm3 |
| 756 movl %eax,%ebp |
| 757 psrlq $8,%mm6 |
| 758 pxor 272(%esp,%edi,8),%mm7 |
| 759 andb $15,%al |
| 760 psllq $56,%mm3 |
| 761 pxor %mm1,%mm6 |
| 762 shrl $4,%ebp |
| 763 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 764 pxor 16(%esp,%eax,8),%mm7 |
| 765 roll $8,%edx |
| 766 pxor 144(%esp,%eax,8),%mm6 |
| 767 pxor %mm3,%mm7 |
| 768 pxor 400(%esp,%edi,8),%mm6 |
| 769 xorb (%esp,%edi,1),%cl |
| 770 movb %dl,%al |
| 771 movl 528(%esp),%edx |
| 772 movd %mm7,%ebx |
| 773 movzbl %cl,%ecx |
| 774 psrlq $8,%mm7 |
| 775 movq %mm6,%mm3 |
| 776 movl %eax,%edi |
| 777 psrlq $8,%mm6 |
| 778 pxor 272(%esp,%ebp,8),%mm7 |
| 779 andb $15,%al |
| 780 psllq $56,%mm3 |
| 781 pxor %mm0,%mm6 |
| 782 shrl $4,%edi |
| 783 pinsrw $2,(%esi,%ecx,2),%mm2 |
| 784 pxor 16(%esp,%eax,8),%mm7 |
| 785 roll $8,%edx |
| 786 pxor 144(%esp,%eax,8),%mm6 |
| 787 pxor %mm3,%mm7 |
| 788 pxor 400(%esp,%ebp,8),%mm6 |
| 789 xorb (%esp,%ebp,1),%bl |
| 790 movb %dl,%al |
| 791 movd %mm7,%ecx |
| 792 movzbl %bl,%ebx |
| 793 psrlq $8,%mm7 |
| 794 movq %mm6,%mm3 |
| 795 movl %eax,%ebp |
| 796 psrlq $8,%mm6 |
| 797 pxor 272(%esp,%edi,8),%mm7 |
| 798 andb $15,%al |
| 799 psllq $56,%mm3 |
| 800 pxor %mm2,%mm6 |
| 801 shrl $4,%ebp |
| 802 pinsrw $2,(%esi,%ebx,2),%mm1 |
| 803 pxor 16(%esp,%eax,8),%mm7 |
| 804 roll $8,%edx |
| 805 pxor 144(%esp,%eax,8),%mm6 |
| 806 pxor %mm3,%mm7 |
| 807 pxor 400(%esp,%edi,8),%mm6 |
| 808 xorb (%esp,%edi,1),%cl |
| 809 movb %dl,%al |
| 810 movd %mm7,%ebx |
| 811 movzbl %cl,%ecx |
| 812 psrlq $8,%mm7 |
| 813 movq %mm6,%mm3 |
| 814 movl %eax,%edi |
| 815 psrlq $8,%mm6 |
| 816 pxor 272(%esp,%ebp,8),%mm7 |
| 817 andb $15,%al |
| 818 psllq $56,%mm3 |
| 819 pxor %mm1,%mm6 |
| 820 shrl $4,%edi |
| 821 pinsrw $2,(%esi,%ecx,2),%mm0 |
| 822 pxor 16(%esp,%eax,8),%mm7 |
| 823 roll $8,%edx |
| 824 pxor 144(%esp,%eax,8),%mm6 |
| 825 pxor %mm3,%mm7 |
| 826 pxor 400(%esp,%ebp,8),%mm6 |
| 827 xorb (%esp,%ebp,1),%bl |
| 828 movb %dl,%al |
| 829 movd %mm7,%ecx |
| 830 movzbl %bl,%ebx |
| 831 psrlq $8,%mm7 |
| 832 movq %mm6,%mm3 |
| 833 movl %eax,%ebp |
| 834 psrlq $8,%mm6 |
| 835 pxor 272(%esp,%edi,8),%mm7 |
| 836 andb $15,%al |
| 837 psllq $56,%mm3 |
| 838 pxor %mm0,%mm6 |
| 839 shrl $4,%ebp |
| 840 pinsrw $2,(%esi,%ebx,2),%mm2 |
| 841 pxor 16(%esp,%eax,8),%mm7 |
| 842 roll $8,%edx |
| 843 pxor 144(%esp,%eax,8),%mm6 |
| 844 pxor %mm3,%mm7 |
| 845 pxor 400(%esp,%edi,8),%mm6 |
| 846 xorb (%esp,%edi,1),%cl |
| 847 movb %dl,%al |
| 848 movl 524(%esp),%edx |
| 849 movd %mm7,%ebx |
| 850 movzbl %cl,%ecx |
| 851 psrlq $8,%mm7 |
| 852 movq %mm6,%mm3 |
| 853 movl %eax,%edi |
| 854 psrlq $8,%mm6 |
| 855 pxor 272(%esp,%ebp,8),%mm7 |
| 856 andb $15,%al |
| 857 psllq $56,%mm3 |
| 858 pxor %mm2,%mm6 |
| 859 shrl $4,%edi |
| 860 pinsrw $2,(%esi,%ecx,2),%mm1 |
| 861 pxor 16(%esp,%eax,8),%mm7 |
| 862 pxor 144(%esp,%eax,8),%mm6 |
| 863 xorb (%esp,%ebp,1),%bl |
| 864 pxor %mm3,%mm7 |
| 865 pxor 400(%esp,%ebp,8),%mm6 |
| 866 movzbl %bl,%ebx |
| 867 pxor %mm2,%mm2 |
| 868 psllq $4,%mm1 |
| 869 movd %mm7,%ecx |
| 870 psrlq $4,%mm7 |
| 871 movq %mm6,%mm3 |
| 872 psrlq $4,%mm6 |
| 873 shll $4,%ecx |
| 874 pxor 16(%esp,%edi,8),%mm7 |
| 875 psllq $60,%mm3 |
| 876 movzbl %cl,%ecx |
| 877 pxor %mm3,%mm7 |
| 878 pxor 144(%esp,%edi,8),%mm6 |
| 879 pinsrw $2,(%esi,%ebx,2),%mm0 |
| 880 pxor %mm1,%mm6 |
| 881 movd %mm7,%edx |
| 882 pinsrw $3,(%esi,%ecx,2),%mm2 |
| 883 psllq $12,%mm0 |
| 884 pxor %mm0,%mm6 |
| 885 psrlq $32,%mm7 |
| 886 pxor %mm2,%mm6 |
| 887 movl 548(%esp),%ecx |
| 888 movd %mm7,%ebx |
| 889 movq %mm6,%mm3 |
| 890 psllw $8,%mm6 |
| 891 psrlw $8,%mm3 |
| 892 por %mm3,%mm6 |
| 893 bswap %edx |
| 894 pshufw $27,%mm6,%mm6 |
| 895 bswap %ebx |
| 896 cmpl 552(%esp),%ecx |
| 897 jne .L009outer |
| 898 movl 544(%esp),%eax |
| 899 movl %edx,12(%eax) |
| 900 movl %ebx,8(%eax) |
| 901 movq %mm6,(%eax) |
| 902 movl 556(%esp),%esp |
| 903 emms |
| 904 popl %edi |
| 905 popl %esi |
| 906 popl %ebx |
| 907 popl %ebp |
| 908 ret |
| 909 .size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin |
| 910 .globl gcm_init_clmul |
| 911 .type gcm_init_clmul,@function |
| 912 .align 16 |
| 913 gcm_init_clmul: |
| 914 .L_gcm_init_clmul_begin: |
| 915 movl 4(%esp),%edx |
| 916 movl 8(%esp),%eax |
| 917 call .L010pic |
| 918 .L010pic: |
| 919 popl %ecx |
| 920 leal .Lbswap-.L010pic(%ecx),%ecx |
| 921 movdqu (%eax),%xmm2 |
| 922 pshufd $78,%xmm2,%xmm2 |
| 923 pshufd $255,%xmm2,%xmm4 |
| 924 movdqa %xmm2,%xmm3 |
| 925 psllq $1,%xmm2 |
| 926 pxor %xmm5,%xmm5 |
| 927 psrlq $63,%xmm3 |
| 928 pcmpgtd %xmm4,%xmm5 |
| 929 pslldq $8,%xmm3 |
| 930 por %xmm3,%xmm2 |
| 931 pand 16(%ecx),%xmm5 |
| 932 pxor %xmm5,%xmm2 |
| 933 movdqa %xmm2,%xmm0 |
| 934 movdqa %xmm0,%xmm1 |
| 935 pshufd $78,%xmm0,%xmm3 |
| 936 pshufd $78,%xmm2,%xmm4 |
| 937 pxor %xmm0,%xmm3 |
| 938 pxor %xmm2,%xmm4 |
| 939 .byte 102,15,58,68,194,0 |
| 940 .byte 102,15,58,68,202,17 |
| 941 .byte 102,15,58,68,220,0 |
| 942 xorps %xmm0,%xmm3 |
| 943 xorps %xmm1,%xmm3 |
| 944 movdqa %xmm3,%xmm4 |
| 945 psrldq $8,%xmm3 |
| 946 pslldq $8,%xmm4 |
| 947 pxor %xmm3,%xmm1 |
| 948 pxor %xmm4,%xmm0 |
| 949 movdqa %xmm0,%xmm4 |
| 950 movdqa %xmm0,%xmm3 |
| 951 psllq $5,%xmm0 |
| 952 pxor %xmm0,%xmm3 |
| 953 psllq $1,%xmm0 |
| 954 pxor %xmm3,%xmm0 |
| 955 psllq $57,%xmm0 |
| 956 movdqa %xmm0,%xmm3 |
| 957 pslldq $8,%xmm0 |
| 958 psrldq $8,%xmm3 |
| 959 pxor %xmm4,%xmm0 |
| 960 pxor %xmm3,%xmm1 |
| 961 movdqa %xmm0,%xmm4 |
| 962 psrlq $1,%xmm0 |
| 963 pxor %xmm4,%xmm1 |
| 964 pxor %xmm0,%xmm4 |
| 965 psrlq $5,%xmm0 |
| 966 pxor %xmm4,%xmm0 |
| 967 psrlq $1,%xmm0 |
| 968 pxor %xmm1,%xmm0 |
| 969 pshufd $78,%xmm2,%xmm3 |
| 970 pshufd $78,%xmm0,%xmm4 |
| 971 pxor %xmm2,%xmm3 |
| 972 movdqu %xmm2,(%edx) |
| 973 pxor %xmm0,%xmm4 |
| 974 movdqu %xmm0,16(%edx) |
| 975 .byte 102,15,58,15,227,8 |
| 976 movdqu %xmm4,32(%edx) |
| 977 ret |
| 978 .size gcm_init_clmul,.-.L_gcm_init_clmul_begin |
| 979 .globl gcm_gmult_clmul |
| 980 .type gcm_gmult_clmul,@function |
| 981 .align 16 |
| 982 gcm_gmult_clmul: |
| 983 .L_gcm_gmult_clmul_begin: |
| 984 movl 4(%esp),%eax |
| 985 movl 8(%esp),%edx |
| 986 call .L011pic |
| 987 .L011pic: |
| 988 popl %ecx |
| 989 leal .Lbswap-.L011pic(%ecx),%ecx |
| 990 movdqu (%eax),%xmm0 |
| 991 movdqa (%ecx),%xmm5 |
| 992 movups (%edx),%xmm2 |
| 993 .byte 102,15,56,0,197 |
| 994 movups 32(%edx),%xmm4 |
| 995 movdqa %xmm0,%xmm1 |
| 996 pshufd $78,%xmm0,%xmm3 |
| 997 pxor %xmm0,%xmm3 |
| 998 .byte 102,15,58,68,194,0 |
| 999 .byte 102,15,58,68,202,17 |
| 1000 .byte 102,15,58,68,220,0 |
| 1001 xorps %xmm0,%xmm3 |
| 1002 xorps %xmm1,%xmm3 |
| 1003 movdqa %xmm3,%xmm4 |
| 1004 psrldq $8,%xmm3 |
| 1005 pslldq $8,%xmm4 |
| 1006 pxor %xmm3,%xmm1 |
| 1007 pxor %xmm4,%xmm0 |
| 1008 movdqa %xmm0,%xmm4 |
| 1009 movdqa %xmm0,%xmm3 |
| 1010 psllq $5,%xmm0 |
| 1011 pxor %xmm0,%xmm3 |
| 1012 psllq $1,%xmm0 |
| 1013 pxor %xmm3,%xmm0 |
| 1014 psllq $57,%xmm0 |
| 1015 movdqa %xmm0,%xmm3 |
| 1016 pslldq $8,%xmm0 |
| 1017 psrldq $8,%xmm3 |
| 1018 pxor %xmm4,%xmm0 |
| 1019 pxor %xmm3,%xmm1 |
| 1020 movdqa %xmm0,%xmm4 |
| 1021 psrlq $1,%xmm0 |
| 1022 pxor %xmm4,%xmm1 |
| 1023 pxor %xmm0,%xmm4 |
| 1024 psrlq $5,%xmm0 |
| 1025 pxor %xmm4,%xmm0 |
| 1026 psrlq $1,%xmm0 |
| 1027 pxor %xmm1,%xmm0 |
| 1028 .byte 102,15,56,0,197 |
| 1029 movdqu %xmm0,(%eax) |
| 1030 ret |
| 1031 .size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin |
| 1032 .globl gcm_ghash_clmul |
| 1033 .type gcm_ghash_clmul,@function |
| 1034 .align 16 |
| 1035 gcm_ghash_clmul: |
| 1036 .L_gcm_ghash_clmul_begin: |
| 1037 pushl %ebp |
| 1038 pushl %ebx |
| 1039 pushl %esi |
| 1040 pushl %edi |
| 1041 movl 20(%esp),%eax |
| 1042 movl 24(%esp),%edx |
| 1043 movl 28(%esp),%esi |
| 1044 movl 32(%esp),%ebx |
| 1045 call .L012pic |
| 1046 .L012pic: |
| 1047 popl %ecx |
| 1048 leal .Lbswap-.L012pic(%ecx),%ecx |
| 1049 movdqu (%eax),%xmm0 |
| 1050 movdqa (%ecx),%xmm5 |
| 1051 movdqu (%edx),%xmm2 |
| 1052 .byte 102,15,56,0,197 |
| 1053 subl $16,%ebx |
| 1054 jz .L013odd_tail |
| 1055 movdqu (%esi),%xmm3 |
| 1056 movdqu 16(%esi),%xmm6 |
| 1057 .byte 102,15,56,0,221 |
| 1058 .byte 102,15,56,0,245 |
| 1059 movdqu 32(%edx),%xmm5 |
| 1060 pxor %xmm3,%xmm0 |
| 1061 pshufd $78,%xmm6,%xmm3 |
| 1062 movdqa %xmm6,%xmm7 |
| 1063 pxor %xmm6,%xmm3 |
| 1064 leal 32(%esi),%esi |
| 1065 .byte 102,15,58,68,242,0 |
| 1066 .byte 102,15,58,68,250,17 |
| 1067 .byte 102,15,58,68,221,0 |
| 1068 movups 16(%edx),%xmm2 |
| 1069 nop |
| 1070 subl $32,%ebx |
| 1071 jbe .L014even_tail |
| 1072 jmp .L015mod_loop |
| 1073 .align 32 |
| 1074 .L015mod_loop: |
| 1075 pshufd $78,%xmm0,%xmm4 |
| 1076 movdqa %xmm0,%xmm1 |
| 1077 pxor %xmm0,%xmm4 |
| 1078 nop |
| 1079 .byte 102,15,58,68,194,0 |
| 1080 .byte 102,15,58,68,202,17 |
| 1081 .byte 102,15,58,68,229,16 |
| 1082 movups (%edx),%xmm2 |
| 1083 xorps %xmm6,%xmm0 |
| 1084 movdqa (%ecx),%xmm5 |
| 1085 xorps %xmm7,%xmm1 |
| 1086 movdqu (%esi),%xmm7 |
| 1087 pxor %xmm0,%xmm3 |
| 1088 movdqu 16(%esi),%xmm6 |
| 1089 pxor %xmm1,%xmm3 |
| 1090 .byte 102,15,56,0,253 |
| 1091 pxor %xmm3,%xmm4 |
| 1092 movdqa %xmm4,%xmm3 |
| 1093 psrldq $8,%xmm4 |
| 1094 pslldq $8,%xmm3 |
| 1095 pxor %xmm4,%xmm1 |
| 1096 pxor %xmm3,%xmm0 |
| 1097 .byte 102,15,56,0,245 |
| 1098 pxor %xmm7,%xmm1 |
| 1099 movdqa %xmm6,%xmm7 |
| 1100 movdqa %xmm0,%xmm4 |
| 1101 movdqa %xmm0,%xmm3 |
| 1102 psllq $5,%xmm0 |
| 1103 pxor %xmm0,%xmm3 |
| 1104 psllq $1,%xmm0 |
| 1105 pxor %xmm3,%xmm0 |
| 1106 .byte 102,15,58,68,242,0 |
| 1107 movups 32(%edx),%xmm5 |
| 1108 psllq $57,%xmm0 |
| 1109 movdqa %xmm0,%xmm3 |
| 1110 pslldq $8,%xmm0 |
| 1111 psrldq $8,%xmm3 |
| 1112 pxor %xmm4,%xmm0 |
| 1113 pxor %xmm3,%xmm1 |
| 1114 pshufd $78,%xmm7,%xmm3 |
| 1115 movdqa %xmm0,%xmm4 |
| 1116 psrlq $1,%xmm0 |
| 1117 pxor %xmm7,%xmm3 |
| 1118 pxor %xmm4,%xmm1 |
| 1119 .byte 102,15,58,68,250,17 |
| 1120 movups 16(%edx),%xmm2 |
| 1121 pxor %xmm0,%xmm4 |
| 1122 psrlq $5,%xmm0 |
| 1123 pxor %xmm4,%xmm0 |
| 1124 psrlq $1,%xmm0 |
| 1125 pxor %xmm1,%xmm0 |
| 1126 .byte 102,15,58,68,221,0 |
| 1127 leal 32(%esi),%esi |
| 1128 subl $32,%ebx |
| 1129 ja .L015mod_loop |
| 1130 .L014even_tail: |
| 1131 pshufd $78,%xmm0,%xmm4 |
| 1132 movdqa %xmm0,%xmm1 |
| 1133 pxor %xmm0,%xmm4 |
| 1134 .byte 102,15,58,68,194,0 |
| 1135 .byte 102,15,58,68,202,17 |
| 1136 .byte 102,15,58,68,229,16 |
| 1137 movdqa (%ecx),%xmm5 |
| 1138 xorps %xmm6,%xmm0 |
| 1139 xorps %xmm7,%xmm1 |
| 1140 pxor %xmm0,%xmm3 |
| 1141 pxor %xmm1,%xmm3 |
| 1142 pxor %xmm3,%xmm4 |
| 1143 movdqa %xmm4,%xmm3 |
| 1144 psrldq $8,%xmm4 |
| 1145 pslldq $8,%xmm3 |
| 1146 pxor %xmm4,%xmm1 |
| 1147 pxor %xmm3,%xmm0 |
| 1148 movdqa %xmm0,%xmm4 |
| 1149 movdqa %xmm0,%xmm3 |
| 1150 psllq $5,%xmm0 |
| 1151 pxor %xmm0,%xmm3 |
| 1152 psllq $1,%xmm0 |
| 1153 pxor %xmm3,%xmm0 |
| 1154 psllq $57,%xmm0 |
| 1155 movdqa %xmm0,%xmm3 |
| 1156 pslldq $8,%xmm0 |
| 1157 psrldq $8,%xmm3 |
| 1158 pxor %xmm4,%xmm0 |
| 1159 pxor %xmm3,%xmm1 |
| 1160 movdqa %xmm0,%xmm4 |
| 1161 psrlq $1,%xmm0 |
| 1162 pxor %xmm4,%xmm1 |
| 1163 pxor %xmm0,%xmm4 |
| 1164 psrlq $5,%xmm0 |
| 1165 pxor %xmm4,%xmm0 |
| 1166 psrlq $1,%xmm0 |
| 1167 pxor %xmm1,%xmm0 |
| 1168 testl %ebx,%ebx |
| 1169 jnz .L016done |
| 1170 movups (%edx),%xmm2 |
| 1171 .L013odd_tail: |
| 1172 movdqu (%esi),%xmm3 |
| 1173 .byte 102,15,56,0,221 |
| 1174 pxor %xmm3,%xmm0 |
| 1175 movdqa %xmm0,%xmm1 |
| 1176 pshufd $78,%xmm0,%xmm3 |
| 1177 pshufd $78,%xmm2,%xmm4 |
| 1178 pxor %xmm0,%xmm3 |
| 1179 pxor %xmm2,%xmm4 |
| 1180 .byte 102,15,58,68,194,0 |
| 1181 .byte 102,15,58,68,202,17 |
| 1182 .byte 102,15,58,68,220,0 |
| 1183 xorps %xmm0,%xmm3 |
| 1184 xorps %xmm1,%xmm3 |
| 1185 movdqa %xmm3,%xmm4 |
| 1186 psrldq $8,%xmm3 |
| 1187 pslldq $8,%xmm4 |
| 1188 pxor %xmm3,%xmm1 |
| 1189 pxor %xmm4,%xmm0 |
| 1190 movdqa %xmm0,%xmm4 |
| 1191 movdqa %xmm0,%xmm3 |
| 1192 psllq $5,%xmm0 |
| 1193 pxor %xmm0,%xmm3 |
| 1194 psllq $1,%xmm0 |
| 1195 pxor %xmm3,%xmm0 |
| 1196 psllq $57,%xmm0 |
| 1197 movdqa %xmm0,%xmm3 |
| 1198 pslldq $8,%xmm0 |
| 1199 psrldq $8,%xmm3 |
| 1200 pxor %xmm4,%xmm0 |
| 1201 pxor %xmm3,%xmm1 |
| 1202 movdqa %xmm0,%xmm4 |
| 1203 psrlq $1,%xmm0 |
| 1204 pxor %xmm4,%xmm1 |
| 1205 pxor %xmm0,%xmm4 |
| 1206 psrlq $5,%xmm0 |
| 1207 pxor %xmm4,%xmm0 |
| 1208 psrlq $1,%xmm0 |
| 1209 pxor %xmm1,%xmm0 |
| 1210 .L016done: |
| 1211 .byte 102,15,56,0,197 |
| 1212 movdqu %xmm0,(%eax) |
| 1213 popl %edi |
| 1214 popl %esi |
| 1215 popl %ebx |
| 1216 popl %ebp |
| 1217 ret |
| 1218 .size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin |
| 1219 .align 64 |
| 1220 .Lbswap: |
| 1221 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 1222 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 |
| 1223 .align 64 |
| 1224 .Lrem_8bit: |
| 1225 .value 0,450,900,582,1800,1738,1164,1358 |
| 1226 .value 3600,4050,3476,3158,2328,2266,2716,2910 |
| 1227 .value 7200,7650,8100,7782,6952,6890,6316,6510 |
| 1228 .value 4656,5106,4532,4214,5432,5370,5820,6014 |
| 1229 .value 14400,14722,15300,14854,16200,16010,15564,15630 |
| 1230 .value 13904,14226,13780,13334,12632,12442,13020,13086 |
| 1231 .value 9312,9634,10212,9766,9064,8874,8428,8494 |
| 1232 .value 10864,11186,10740,10294,11640,11450,12028,12094 |
| 1233 .value 28800,28994,29444,29382,30600,30282,29708,30158 |
| 1234 .value 32400,32594,32020,31958,31128,30810,31260,31710 |
| 1235 .value 27808,28002,28452,28390,27560,27242,26668,27118 |
| 1236 .value 25264,25458,24884,24822,26040,25722,26172,26622 |
| 1237 .value 18624,18690,19268,19078,20424,19978,19532,19854 |
| 1238 .value 18128,18194,17748,17558,16856,16410,16988,17310 |
| 1239 .value 21728,21794,22372,22182,21480,21034,20588,20910 |
| 1240 .value 23280,23346,22900,22710,24056,23610,24188,24510 |
| 1241 .value 57600,57538,57988,58182,58888,59338,58764,58446 |
| 1242 .value 61200,61138,60564,60758,59416,59866,60316,59998 |
| 1243 .value 64800,64738,65188,65382,64040,64490,63916,63598 |
| 1244 .value 62256,62194,61620,61814,62520,62970,63420,63102 |
| 1245 .value 55616,55426,56004,56070,56904,57226,56780,56334 |
| 1246 .value 55120,54930,54484,54550,53336,53658,54236,53790 |
| 1247 .value 50528,50338,50916,50982,49768,50090,49644,49198 |
| 1248 .value 52080,51890,51444,51510,52344,52666,53244,52798 |
| 1249 .value 37248,36930,37380,37830,38536,38730,38156,38094 |
| 1250 .value 40848,40530,39956,40406,39064,39258,39708,39646 |
| 1251 .value 36256,35938,36388,36838,35496,35690,35116,35054 |
| 1252 .value 33712,33394,32820,33270,33976,34170,34620,34558 |
| 1253 .value 43456,43010,43588,43910,44744,44810,44364,44174 |
| 1254 .value 42960,42514,42068,42390,41176,41242,41820,41630 |
| 1255 .value 46560,46114,46692,47014,45800,45866,45420,45230 |
| 1256 .value 48112,47666,47220,47542,48376,48442,49020,48830 |
| 1257 .align 64 |
| 1258 .Lrem_4bit: |
| 1259 .long 0,0,0,471859200,0,943718400,0,610271232 |
| 1260 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
| 1261 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
| 1262 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
| 1263 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 |
| 1264 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 |
| 1265 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 |
| 1266 .byte 0 |
| 1267 #endif |
OLD | NEW |