OLD | NEW |
(Empty) | |
| 1 .file "ghash-x86.s" |
| 2 .text |
| 3 .globl _gcm_gmult_4bit_x86 |
| 4 .align 4 |
| 5 _gcm_gmult_4bit_x86: |
| 6 L_gcm_gmult_4bit_x86_begin: |
| 7 pushl %ebp |
| 8 pushl %ebx |
| 9 pushl %esi |
| 10 pushl %edi |
| 11 subl $84,%esp |
| 12 movl 104(%esp),%edi |
| 13 movl 108(%esp),%esi |
| 14 movl (%edi),%ebp |
| 15 movl 4(%edi),%edx |
| 16 movl 8(%edi),%ecx |
| 17 movl 12(%edi),%ebx |
| 18 movl $0,16(%esp) |
| 19 movl $471859200,20(%esp) |
| 20 movl $943718400,24(%esp) |
| 21 movl $610271232,28(%esp) |
| 22 movl $1887436800,32(%esp) |
| 23 movl $1822425088,36(%esp) |
| 24 movl $1220542464,40(%esp) |
| 25 movl $1423966208,44(%esp) |
| 26 movl $3774873600,48(%esp) |
| 27 movl $4246732800,52(%esp) |
| 28 movl $3644850176,56(%esp) |
| 29 movl $3311403008,60(%esp) |
| 30 movl $2441084928,64(%esp) |
| 31 movl $2376073216,68(%esp) |
| 32 movl $2847932416,72(%esp) |
| 33 movl $3051356160,76(%esp) |
| 34 movl %ebp,(%esp) |
| 35 movl %edx,4(%esp) |
| 36 movl %ecx,8(%esp) |
| 37 movl %ebx,12(%esp) |
| 38 shrl $20,%ebx |
| 39 andl $240,%ebx |
| 40 movl 4(%esi,%ebx,1),%ebp |
| 41 movl (%esi,%ebx,1),%edx |
| 42 movl 12(%esi,%ebx,1),%ecx |
| 43 movl 8(%esi,%ebx,1),%ebx |
| 44 xorl %eax,%eax |
| 45 movl $15,%edi |
| 46 jmp L000x86_loop |
| 47 .align 4,0x90 |
| 48 L000x86_loop: |
| 49 movb %bl,%al |
| 50 shrdl $4,%ecx,%ebx |
| 51 andb $15,%al |
| 52 shrdl $4,%edx,%ecx |
| 53 shrdl $4,%ebp,%edx |
| 54 shrl $4,%ebp |
| 55 xorl 16(%esp,%eax,4),%ebp |
| 56 movb (%esp,%edi,1),%al |
| 57 andb $240,%al |
| 58 xorl 8(%esi,%eax,1),%ebx |
| 59 xorl 12(%esi,%eax,1),%ecx |
| 60 xorl (%esi,%eax,1),%edx |
| 61 xorl 4(%esi,%eax,1),%ebp |
| 62 decl %edi |
| 63 js L001x86_break |
| 64 movb %bl,%al |
| 65 shrdl $4,%ecx,%ebx |
| 66 andb $15,%al |
| 67 shrdl $4,%edx,%ecx |
| 68 shrdl $4,%ebp,%edx |
| 69 shrl $4,%ebp |
| 70 xorl 16(%esp,%eax,4),%ebp |
| 71 movb (%esp,%edi,1),%al |
| 72 shlb $4,%al |
| 73 xorl 8(%esi,%eax,1),%ebx |
| 74 xorl 12(%esi,%eax,1),%ecx |
| 75 xorl (%esi,%eax,1),%edx |
| 76 xorl 4(%esi,%eax,1),%ebp |
| 77 jmp L000x86_loop |
| 78 .align 4,0x90 |
| 79 L001x86_break: |
| 80 bswap %ebx |
| 81 bswap %ecx |
| 82 bswap %edx |
| 83 bswap %ebp |
| 84 movl 104(%esp),%edi |
| 85 movl %ebx,12(%edi) |
| 86 movl %ecx,8(%edi) |
| 87 movl %edx,4(%edi) |
| 88 movl %ebp,(%edi) |
| 89 addl $84,%esp |
| 90 popl %edi |
| 91 popl %esi |
| 92 popl %ebx |
| 93 popl %ebp |
| 94 ret |
| 95 .globl _gcm_ghash_4bit_x86 |
| 96 .align 4 |
| 97 _gcm_ghash_4bit_x86: |
| 98 L_gcm_ghash_4bit_x86_begin: |
| 99 pushl %ebp |
| 100 pushl %ebx |
| 101 pushl %esi |
| 102 pushl %edi |
| 103 subl $84,%esp |
| 104 movl 104(%esp),%ebx |
| 105 movl 108(%esp),%esi |
| 106 movl 112(%esp),%edi |
| 107 movl 116(%esp),%ecx |
| 108 addl %edi,%ecx |
| 109 movl %ecx,116(%esp) |
| 110 movl (%ebx),%ebp |
| 111 movl 4(%ebx),%edx |
| 112 movl 8(%ebx),%ecx |
| 113 movl 12(%ebx),%ebx |
| 114 movl $0,16(%esp) |
| 115 movl $471859200,20(%esp) |
| 116 movl $943718400,24(%esp) |
| 117 movl $610271232,28(%esp) |
| 118 movl $1887436800,32(%esp) |
| 119 movl $1822425088,36(%esp) |
| 120 movl $1220542464,40(%esp) |
| 121 movl $1423966208,44(%esp) |
| 122 movl $3774873600,48(%esp) |
| 123 movl $4246732800,52(%esp) |
| 124 movl $3644850176,56(%esp) |
| 125 movl $3311403008,60(%esp) |
| 126 movl $2441084928,64(%esp) |
| 127 movl $2376073216,68(%esp) |
| 128 movl $2847932416,72(%esp) |
| 129 movl $3051356160,76(%esp) |
| 130 .align 4,0x90 |
| 131 L002x86_outer_loop: |
| 132 xorl 12(%edi),%ebx |
| 133 xorl 8(%edi),%ecx |
| 134 xorl 4(%edi),%edx |
| 135 xorl (%edi),%ebp |
| 136 movl %ebx,12(%esp) |
| 137 movl %ecx,8(%esp) |
| 138 movl %edx,4(%esp) |
| 139 movl %ebp,(%esp) |
| 140 shrl $20,%ebx |
| 141 andl $240,%ebx |
| 142 movl 4(%esi,%ebx,1),%ebp |
| 143 movl (%esi,%ebx,1),%edx |
| 144 movl 12(%esi,%ebx,1),%ecx |
| 145 movl 8(%esi,%ebx,1),%ebx |
| 146 xorl %eax,%eax |
| 147 movl $15,%edi |
| 148 jmp L003x86_loop |
| 149 .align 4,0x90 |
| 150 L003x86_loop: |
| 151 movb %bl,%al |
| 152 shrdl $4,%ecx,%ebx |
| 153 andb $15,%al |
| 154 shrdl $4,%edx,%ecx |
| 155 shrdl $4,%ebp,%edx |
| 156 shrl $4,%ebp |
| 157 xorl 16(%esp,%eax,4),%ebp |
| 158 movb (%esp,%edi,1),%al |
| 159 andb $240,%al |
| 160 xorl 8(%esi,%eax,1),%ebx |
| 161 xorl 12(%esi,%eax,1),%ecx |
| 162 xorl (%esi,%eax,1),%edx |
| 163 xorl 4(%esi,%eax,1),%ebp |
| 164 decl %edi |
| 165 js L004x86_break |
| 166 movb %bl,%al |
| 167 shrdl $4,%ecx,%ebx |
| 168 andb $15,%al |
| 169 shrdl $4,%edx,%ecx |
| 170 shrdl $4,%ebp,%edx |
| 171 shrl $4,%ebp |
| 172 xorl 16(%esp,%eax,4),%ebp |
| 173 movb (%esp,%edi,1),%al |
| 174 shlb $4,%al |
| 175 xorl 8(%esi,%eax,1),%ebx |
| 176 xorl 12(%esi,%eax,1),%ecx |
| 177 xorl (%esi,%eax,1),%edx |
| 178 xorl 4(%esi,%eax,1),%ebp |
| 179 jmp L003x86_loop |
| 180 .align 4,0x90 |
| 181 L004x86_break: |
| 182 bswap %ebx |
| 183 bswap %ecx |
| 184 bswap %edx |
| 185 bswap %ebp |
| 186 movl 112(%esp),%edi |
| 187 leal 16(%edi),%edi |
| 188 cmpl 116(%esp),%edi |
| 189 movl %edi,112(%esp) |
| 190 jb L002x86_outer_loop |
| 191 movl 104(%esp),%edi |
| 192 movl %ebx,12(%edi) |
| 193 movl %ecx,8(%edi) |
| 194 movl %edx,4(%edi) |
| 195 movl %ebp,(%edi) |
| 196 addl $84,%esp |
| 197 popl %edi |
| 198 popl %esi |
| 199 popl %ebx |
| 200 popl %ebp |
| 201 ret |
| 202 .align 4 |
| 203 __mmx_gmult_4bit_inner: |
| 204 xorl %ecx,%ecx |
| 205 movl %ebx,%edx |
| 206 movb %dl,%cl |
| 207 shlb $4,%cl |
| 208 andl $240,%edx |
| 209 movq 8(%esi,%ecx,1),%mm0 |
| 210 movq (%esi,%ecx,1),%mm1 |
| 211 movd %mm0,%ebp |
| 212 psrlq $4,%mm0 |
| 213 movq %mm1,%mm2 |
| 214 psrlq $4,%mm1 |
| 215 pxor 8(%esi,%edx,1),%mm0 |
| 216 movb 14(%edi),%cl |
| 217 psllq $60,%mm2 |
| 218 andl $15,%ebp |
| 219 pxor (%esi,%edx,1),%mm1 |
| 220 movl %ecx,%edx |
| 221 movd %mm0,%ebx |
| 222 pxor %mm2,%mm0 |
| 223 shlb $4,%cl |
| 224 psrlq $4,%mm0 |
| 225 movq %mm1,%mm2 |
| 226 psrlq $4,%mm1 |
| 227 pxor 8(%esi,%ecx,1),%mm0 |
| 228 psllq $60,%mm2 |
| 229 andl $240,%edx |
| 230 pxor (%eax,%ebp,8),%mm1 |
| 231 andl $15,%ebx |
| 232 pxor (%esi,%ecx,1),%mm1 |
| 233 movd %mm0,%ebp |
| 234 pxor %mm2,%mm0 |
| 235 psrlq $4,%mm0 |
| 236 movq %mm1,%mm2 |
| 237 psrlq $4,%mm1 |
| 238 pxor 8(%esi,%edx,1),%mm0 |
| 239 movb 13(%edi),%cl |
| 240 psllq $60,%mm2 |
| 241 pxor (%eax,%ebx,8),%mm1 |
| 242 andl $15,%ebp |
| 243 pxor (%esi,%edx,1),%mm1 |
| 244 movl %ecx,%edx |
| 245 movd %mm0,%ebx |
| 246 pxor %mm2,%mm0 |
| 247 shlb $4,%cl |
| 248 psrlq $4,%mm0 |
| 249 movq %mm1,%mm2 |
| 250 psrlq $4,%mm1 |
| 251 pxor 8(%esi,%ecx,1),%mm0 |
| 252 psllq $60,%mm2 |
| 253 andl $240,%edx |
| 254 pxor (%eax,%ebp,8),%mm1 |
| 255 andl $15,%ebx |
| 256 pxor (%esi,%ecx,1),%mm1 |
| 257 movd %mm0,%ebp |
| 258 pxor %mm2,%mm0 |
| 259 psrlq $4,%mm0 |
| 260 movq %mm1,%mm2 |
| 261 psrlq $4,%mm1 |
| 262 pxor 8(%esi,%edx,1),%mm0 |
| 263 movb 12(%edi),%cl |
| 264 psllq $60,%mm2 |
| 265 pxor (%eax,%ebx,8),%mm1 |
| 266 andl $15,%ebp |
| 267 pxor (%esi,%edx,1),%mm1 |
| 268 movl %ecx,%edx |
| 269 movd %mm0,%ebx |
| 270 pxor %mm2,%mm0 |
| 271 shlb $4,%cl |
| 272 psrlq $4,%mm0 |
| 273 movq %mm1,%mm2 |
| 274 psrlq $4,%mm1 |
| 275 pxor 8(%esi,%ecx,1),%mm0 |
| 276 psllq $60,%mm2 |
| 277 andl $240,%edx |
| 278 pxor (%eax,%ebp,8),%mm1 |
| 279 andl $15,%ebx |
| 280 pxor (%esi,%ecx,1),%mm1 |
| 281 movd %mm0,%ebp |
| 282 pxor %mm2,%mm0 |
| 283 psrlq $4,%mm0 |
| 284 movq %mm1,%mm2 |
| 285 psrlq $4,%mm1 |
| 286 pxor 8(%esi,%edx,1),%mm0 |
| 287 movb 11(%edi),%cl |
| 288 psllq $60,%mm2 |
| 289 pxor (%eax,%ebx,8),%mm1 |
| 290 andl $15,%ebp |
| 291 pxor (%esi,%edx,1),%mm1 |
| 292 movl %ecx,%edx |
| 293 movd %mm0,%ebx |
| 294 pxor %mm2,%mm0 |
| 295 shlb $4,%cl |
| 296 psrlq $4,%mm0 |
| 297 movq %mm1,%mm2 |
| 298 psrlq $4,%mm1 |
| 299 pxor 8(%esi,%ecx,1),%mm0 |
| 300 psllq $60,%mm2 |
| 301 andl $240,%edx |
| 302 pxor (%eax,%ebp,8),%mm1 |
| 303 andl $15,%ebx |
| 304 pxor (%esi,%ecx,1),%mm1 |
| 305 movd %mm0,%ebp |
| 306 pxor %mm2,%mm0 |
| 307 psrlq $4,%mm0 |
| 308 movq %mm1,%mm2 |
| 309 psrlq $4,%mm1 |
| 310 pxor 8(%esi,%edx,1),%mm0 |
| 311 movb 10(%edi),%cl |
| 312 psllq $60,%mm2 |
| 313 pxor (%eax,%ebx,8),%mm1 |
| 314 andl $15,%ebp |
| 315 pxor (%esi,%edx,1),%mm1 |
| 316 movl %ecx,%edx |
| 317 movd %mm0,%ebx |
| 318 pxor %mm2,%mm0 |
| 319 shlb $4,%cl |
| 320 psrlq $4,%mm0 |
| 321 movq %mm1,%mm2 |
| 322 psrlq $4,%mm1 |
| 323 pxor 8(%esi,%ecx,1),%mm0 |
| 324 psllq $60,%mm2 |
| 325 andl $240,%edx |
| 326 pxor (%eax,%ebp,8),%mm1 |
| 327 andl $15,%ebx |
| 328 pxor (%esi,%ecx,1),%mm1 |
| 329 movd %mm0,%ebp |
| 330 pxor %mm2,%mm0 |
| 331 psrlq $4,%mm0 |
| 332 movq %mm1,%mm2 |
| 333 psrlq $4,%mm1 |
| 334 pxor 8(%esi,%edx,1),%mm0 |
| 335 movb 9(%edi),%cl |
| 336 psllq $60,%mm2 |
| 337 pxor (%eax,%ebx,8),%mm1 |
| 338 andl $15,%ebp |
| 339 pxor (%esi,%edx,1),%mm1 |
| 340 movl %ecx,%edx |
| 341 movd %mm0,%ebx |
| 342 pxor %mm2,%mm0 |
| 343 shlb $4,%cl |
| 344 psrlq $4,%mm0 |
| 345 movq %mm1,%mm2 |
| 346 psrlq $4,%mm1 |
| 347 pxor 8(%esi,%ecx,1),%mm0 |
| 348 psllq $60,%mm2 |
| 349 andl $240,%edx |
| 350 pxor (%eax,%ebp,8),%mm1 |
| 351 andl $15,%ebx |
| 352 pxor (%esi,%ecx,1),%mm1 |
| 353 movd %mm0,%ebp |
| 354 pxor %mm2,%mm0 |
| 355 psrlq $4,%mm0 |
| 356 movq %mm1,%mm2 |
| 357 psrlq $4,%mm1 |
| 358 pxor 8(%esi,%edx,1),%mm0 |
| 359 movb 8(%edi),%cl |
| 360 psllq $60,%mm2 |
| 361 pxor (%eax,%ebx,8),%mm1 |
| 362 andl $15,%ebp |
| 363 pxor (%esi,%edx,1),%mm1 |
| 364 movl %ecx,%edx |
| 365 movd %mm0,%ebx |
| 366 pxor %mm2,%mm0 |
| 367 shlb $4,%cl |
| 368 psrlq $4,%mm0 |
| 369 movq %mm1,%mm2 |
| 370 psrlq $4,%mm1 |
| 371 pxor 8(%esi,%ecx,1),%mm0 |
| 372 psllq $60,%mm2 |
| 373 andl $240,%edx |
| 374 pxor (%eax,%ebp,8),%mm1 |
| 375 andl $15,%ebx |
| 376 pxor (%esi,%ecx,1),%mm1 |
| 377 movd %mm0,%ebp |
| 378 pxor %mm2,%mm0 |
| 379 psrlq $4,%mm0 |
| 380 movq %mm1,%mm2 |
| 381 psrlq $4,%mm1 |
| 382 pxor 8(%esi,%edx,1),%mm0 |
| 383 movb 7(%edi),%cl |
| 384 psllq $60,%mm2 |
| 385 pxor (%eax,%ebx,8),%mm1 |
| 386 andl $15,%ebp |
| 387 pxor (%esi,%edx,1),%mm1 |
| 388 movl %ecx,%edx |
| 389 movd %mm0,%ebx |
| 390 pxor %mm2,%mm0 |
| 391 shlb $4,%cl |
| 392 psrlq $4,%mm0 |
| 393 movq %mm1,%mm2 |
| 394 psrlq $4,%mm1 |
| 395 pxor 8(%esi,%ecx,1),%mm0 |
| 396 psllq $60,%mm2 |
| 397 andl $240,%edx |
| 398 pxor (%eax,%ebp,8),%mm1 |
| 399 andl $15,%ebx |
| 400 pxor (%esi,%ecx,1),%mm1 |
| 401 movd %mm0,%ebp |
| 402 pxor %mm2,%mm0 |
| 403 psrlq $4,%mm0 |
| 404 movq %mm1,%mm2 |
| 405 psrlq $4,%mm1 |
| 406 pxor 8(%esi,%edx,1),%mm0 |
| 407 movb 6(%edi),%cl |
| 408 psllq $60,%mm2 |
| 409 pxor (%eax,%ebx,8),%mm1 |
| 410 andl $15,%ebp |
| 411 pxor (%esi,%edx,1),%mm1 |
| 412 movl %ecx,%edx |
| 413 movd %mm0,%ebx |
| 414 pxor %mm2,%mm0 |
| 415 shlb $4,%cl |
| 416 psrlq $4,%mm0 |
| 417 movq %mm1,%mm2 |
| 418 psrlq $4,%mm1 |
| 419 pxor 8(%esi,%ecx,1),%mm0 |
| 420 psllq $60,%mm2 |
| 421 andl $240,%edx |
| 422 pxor (%eax,%ebp,8),%mm1 |
| 423 andl $15,%ebx |
| 424 pxor (%esi,%ecx,1),%mm1 |
| 425 movd %mm0,%ebp |
| 426 pxor %mm2,%mm0 |
| 427 psrlq $4,%mm0 |
| 428 movq %mm1,%mm2 |
| 429 psrlq $4,%mm1 |
| 430 pxor 8(%esi,%edx,1),%mm0 |
| 431 movb 5(%edi),%cl |
| 432 psllq $60,%mm2 |
| 433 pxor (%eax,%ebx,8),%mm1 |
| 434 andl $15,%ebp |
| 435 pxor (%esi,%edx,1),%mm1 |
| 436 movl %ecx,%edx |
| 437 movd %mm0,%ebx |
| 438 pxor %mm2,%mm0 |
| 439 shlb $4,%cl |
| 440 psrlq $4,%mm0 |
| 441 movq %mm1,%mm2 |
| 442 psrlq $4,%mm1 |
| 443 pxor 8(%esi,%ecx,1),%mm0 |
| 444 psllq $60,%mm2 |
| 445 andl $240,%edx |
| 446 pxor (%eax,%ebp,8),%mm1 |
| 447 andl $15,%ebx |
| 448 pxor (%esi,%ecx,1),%mm1 |
| 449 movd %mm0,%ebp |
| 450 pxor %mm2,%mm0 |
| 451 psrlq $4,%mm0 |
| 452 movq %mm1,%mm2 |
| 453 psrlq $4,%mm1 |
| 454 pxor 8(%esi,%edx,1),%mm0 |
| 455 movb 4(%edi),%cl |
| 456 psllq $60,%mm2 |
| 457 pxor (%eax,%ebx,8),%mm1 |
| 458 andl $15,%ebp |
| 459 pxor (%esi,%edx,1),%mm1 |
| 460 movl %ecx,%edx |
| 461 movd %mm0,%ebx |
| 462 pxor %mm2,%mm0 |
| 463 shlb $4,%cl |
| 464 psrlq $4,%mm0 |
| 465 movq %mm1,%mm2 |
| 466 psrlq $4,%mm1 |
| 467 pxor 8(%esi,%ecx,1),%mm0 |
| 468 psllq $60,%mm2 |
| 469 andl $240,%edx |
| 470 pxor (%eax,%ebp,8),%mm1 |
| 471 andl $15,%ebx |
| 472 pxor (%esi,%ecx,1),%mm1 |
| 473 movd %mm0,%ebp |
| 474 pxor %mm2,%mm0 |
| 475 psrlq $4,%mm0 |
| 476 movq %mm1,%mm2 |
| 477 psrlq $4,%mm1 |
| 478 pxor 8(%esi,%edx,1),%mm0 |
| 479 movb 3(%edi),%cl |
| 480 psllq $60,%mm2 |
| 481 pxor (%eax,%ebx,8),%mm1 |
| 482 andl $15,%ebp |
| 483 pxor (%esi,%edx,1),%mm1 |
| 484 movl %ecx,%edx |
| 485 movd %mm0,%ebx |
| 486 pxor %mm2,%mm0 |
| 487 shlb $4,%cl |
| 488 psrlq $4,%mm0 |
| 489 movq %mm1,%mm2 |
| 490 psrlq $4,%mm1 |
| 491 pxor 8(%esi,%ecx,1),%mm0 |
| 492 psllq $60,%mm2 |
| 493 andl $240,%edx |
| 494 pxor (%eax,%ebp,8),%mm1 |
| 495 andl $15,%ebx |
| 496 pxor (%esi,%ecx,1),%mm1 |
| 497 movd %mm0,%ebp |
| 498 pxor %mm2,%mm0 |
| 499 psrlq $4,%mm0 |
| 500 movq %mm1,%mm2 |
| 501 psrlq $4,%mm1 |
| 502 pxor 8(%esi,%edx,1),%mm0 |
| 503 movb 2(%edi),%cl |
| 504 psllq $60,%mm2 |
| 505 pxor (%eax,%ebx,8),%mm1 |
| 506 andl $15,%ebp |
| 507 pxor (%esi,%edx,1),%mm1 |
| 508 movl %ecx,%edx |
| 509 movd %mm0,%ebx |
| 510 pxor %mm2,%mm0 |
| 511 shlb $4,%cl |
| 512 psrlq $4,%mm0 |
| 513 movq %mm1,%mm2 |
| 514 psrlq $4,%mm1 |
| 515 pxor 8(%esi,%ecx,1),%mm0 |
| 516 psllq $60,%mm2 |
| 517 andl $240,%edx |
| 518 pxor (%eax,%ebp,8),%mm1 |
| 519 andl $15,%ebx |
| 520 pxor (%esi,%ecx,1),%mm1 |
| 521 movd %mm0,%ebp |
| 522 pxor %mm2,%mm0 |
| 523 psrlq $4,%mm0 |
| 524 movq %mm1,%mm2 |
| 525 psrlq $4,%mm1 |
| 526 pxor 8(%esi,%edx,1),%mm0 |
| 527 movb 1(%edi),%cl |
| 528 psllq $60,%mm2 |
| 529 pxor (%eax,%ebx,8),%mm1 |
| 530 andl $15,%ebp |
| 531 pxor (%esi,%edx,1),%mm1 |
| 532 movl %ecx,%edx |
| 533 movd %mm0,%ebx |
| 534 pxor %mm2,%mm0 |
| 535 shlb $4,%cl |
| 536 psrlq $4,%mm0 |
| 537 movq %mm1,%mm2 |
| 538 psrlq $4,%mm1 |
| 539 pxor 8(%esi,%ecx,1),%mm0 |
| 540 psllq $60,%mm2 |
| 541 andl $240,%edx |
| 542 pxor (%eax,%ebp,8),%mm1 |
| 543 andl $15,%ebx |
| 544 pxor (%esi,%ecx,1),%mm1 |
| 545 movd %mm0,%ebp |
| 546 pxor %mm2,%mm0 |
| 547 psrlq $4,%mm0 |
| 548 movq %mm1,%mm2 |
| 549 psrlq $4,%mm1 |
| 550 pxor 8(%esi,%edx,1),%mm0 |
| 551 movb (%edi),%cl |
| 552 psllq $60,%mm2 |
| 553 pxor (%eax,%ebx,8),%mm1 |
| 554 andl $15,%ebp |
| 555 pxor (%esi,%edx,1),%mm1 |
| 556 movl %ecx,%edx |
| 557 movd %mm0,%ebx |
| 558 pxor %mm2,%mm0 |
| 559 shlb $4,%cl |
| 560 psrlq $4,%mm0 |
| 561 movq %mm1,%mm2 |
| 562 psrlq $4,%mm1 |
| 563 pxor 8(%esi,%ecx,1),%mm0 |
| 564 psllq $60,%mm2 |
| 565 andl $240,%edx |
| 566 pxor (%eax,%ebp,8),%mm1 |
| 567 andl $15,%ebx |
| 568 pxor (%esi,%ecx,1),%mm1 |
| 569 movd %mm0,%ebp |
| 570 pxor %mm2,%mm0 |
| 571 psrlq $4,%mm0 |
| 572 movq %mm1,%mm2 |
| 573 psrlq $4,%mm1 |
| 574 pxor 8(%esi,%edx,1),%mm0 |
| 575 psllq $60,%mm2 |
| 576 pxor (%eax,%ebx,8),%mm1 |
| 577 andl $15,%ebp |
| 578 pxor (%esi,%edx,1),%mm1 |
| 579 movd %mm0,%ebx |
| 580 pxor %mm2,%mm0 |
| 581 movl 4(%eax,%ebp,8),%edi |
| 582 psrlq $32,%mm0 |
| 583 movd %mm1,%edx |
| 584 psrlq $32,%mm1 |
| 585 movd %mm0,%ecx |
| 586 movd %mm1,%ebp |
| 587 shll $4,%edi |
| 588 bswap %ebx |
| 589 bswap %edx |
| 590 bswap %ecx |
| 591 xorl %edi,%ebp |
| 592 bswap %ebp |
| 593 ret |
| 594 .globl _gcm_gmult_4bit_mmx |
| 595 .align 4 |
| 596 _gcm_gmult_4bit_mmx: |
| 597 L_gcm_gmult_4bit_mmx_begin: |
| 598 pushl %ebp |
| 599 pushl %ebx |
| 600 pushl %esi |
| 601 pushl %edi |
| 602 movl 20(%esp),%edi |
| 603 movl 24(%esp),%esi |
| 604 call L005pic_point |
| 605 L005pic_point: |
| 606 popl %eax |
| 607 leal Lrem_4bit-L005pic_point(%eax),%eax |
| 608 movzbl 15(%edi),%ebx |
| 609 call __mmx_gmult_4bit_inner |
| 610 movl 20(%esp),%edi |
| 611 emms |
| 612 movl %ebx,12(%edi) |
| 613 movl %edx,4(%edi) |
| 614 movl %ecx,8(%edi) |
| 615 movl %ebp,(%edi) |
| 616 popl %edi |
| 617 popl %esi |
| 618 popl %ebx |
| 619 popl %ebp |
| 620 ret |
| 621 .globl _gcm_ghash_4bit_mmx |
| 622 .align 4 |
| 623 _gcm_ghash_4bit_mmx: |
| 624 L_gcm_ghash_4bit_mmx_begin: |
| 625 pushl %ebp |
| 626 pushl %ebx |
| 627 pushl %esi |
| 628 pushl %edi |
| 629 movl 20(%esp),%ebp |
| 630 movl 24(%esp),%esi |
| 631 movl 28(%esp),%edi |
| 632 movl 32(%esp),%ecx |
| 633 call L006pic_point |
| 634 L006pic_point: |
| 635 popl %eax |
| 636 leal Lrem_4bit-L006pic_point(%eax),%eax |
| 637 addl %edi,%ecx |
| 638 movl %ecx,32(%esp) |
| 639 subl $20,%esp |
| 640 movl 12(%ebp),%ebx |
| 641 movl 4(%ebp),%edx |
| 642 movl 8(%ebp),%ecx |
| 643 movl (%ebp),%ebp |
| 644 jmp L007mmx_outer_loop |
| 645 .align 4,0x90 |
| 646 L007mmx_outer_loop: |
| 647 xorl 12(%edi),%ebx |
| 648 xorl 4(%edi),%edx |
| 649 xorl 8(%edi),%ecx |
| 650 xorl (%edi),%ebp |
| 651 movl %edi,48(%esp) |
| 652 movl %ebx,12(%esp) |
| 653 movl %edx,4(%esp) |
| 654 movl %ecx,8(%esp) |
| 655 movl %ebp,(%esp) |
| 656 movl %esp,%edi |
| 657 shrl $24,%ebx |
| 658 call __mmx_gmult_4bit_inner |
| 659 movl 48(%esp),%edi |
| 660 leal 16(%edi),%edi |
| 661 cmpl 52(%esp),%edi |
| 662 jb L007mmx_outer_loop |
| 663 movl 40(%esp),%edi |
| 664 emms |
| 665 movl %ebx,12(%edi) |
| 666 movl %edx,4(%edi) |
| 667 movl %ecx,8(%edi) |
| 668 movl %ebp,(%edi) |
| 669 addl $20,%esp |
| 670 popl %edi |
| 671 popl %esi |
| 672 popl %ebx |
| 673 popl %ebp |
| 674 ret |
| 675 .align 6,0x90 |
| 676 Lrem_4bit: |
| 677 .long 0,0,0,29491200,0,58982400,0,38141952 |
| 678 .long 0,117964800,0,113901568,0,76283904,0,88997888 |
| 679 .long 0,235929600,0,265420800,0,227803136,0,206962688 |
| 680 .long 0,152567808,0,148504576,0,177995776,0,190709760 |
| 681 .align 6,0x90 |
| 682 L008rem_8bit: |
| 683 .value 0,450,900,582,1800,1738,1164,1358 |
| 684 .value 3600,4050,3476,3158,2328,2266,2716,2910 |
| 685 .value 7200,7650,8100,7782,6952,6890,6316,6510 |
| 686 .value 4656,5106,4532,4214,5432,5370,5820,6014 |
| 687 .value 14400,14722,15300,14854,16200,16010,15564,15630 |
| 688 .value 13904,14226,13780,13334,12632,12442,13020,13086 |
| 689 .value 9312,9634,10212,9766,9064,8874,8428,8494 |
| 690 .value 10864,11186,10740,10294,11640,11450,12028,12094 |
| 691 .value 28800,28994,29444,29382,30600,30282,29708,30158 |
| 692 .value 32400,32594,32020,31958,31128,30810,31260,31710 |
| 693 .value 27808,28002,28452,28390,27560,27242,26668,27118 |
| 694 .value 25264,25458,24884,24822,26040,25722,26172,26622 |
| 695 .value 18624,18690,19268,19078,20424,19978,19532,19854 |
| 696 .value 18128,18194,17748,17558,16856,16410,16988,17310 |
| 697 .value 21728,21794,22372,22182,21480,21034,20588,20910 |
| 698 .value 23280,23346,22900,22710,24056,23610,24188,24510 |
| 699 .value 57600,57538,57988,58182,58888,59338,58764,58446 |
| 700 .value 61200,61138,60564,60758,59416,59866,60316,59998 |
| 701 .value 64800,64738,65188,65382,64040,64490,63916,63598 |
| 702 .value 62256,62194,61620,61814,62520,62970,63420,63102 |
| 703 .value 55616,55426,56004,56070,56904,57226,56780,56334 |
| 704 .value 55120,54930,54484,54550,53336,53658,54236,53790 |
| 705 .value 50528,50338,50916,50982,49768,50090,49644,49198 |
| 706 .value 52080,51890,51444,51510,52344,52666,53244,52798 |
| 707 .value 37248,36930,37380,37830,38536,38730,38156,38094 |
| 708 .value 40848,40530,39956,40406,39064,39258,39708,39646 |
| 709 .value 36256,35938,36388,36838,35496,35690,35116,35054 |
| 710 .value 33712,33394,32820,33270,33976,34170,34620,34558 |
| 711 .value 43456,43010,43588,43910,44744,44810,44364,44174 |
| 712 .value 42960,42514,42068,42390,41176,41242,41820,41630 |
| 713 .value 46560,46114,46692,47014,45800,45866,45420,45230 |
| 714 .value 48112,47666,47220,47542,48376,48442,49020,48830 |
| 715 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 |
| 716 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 |
| 717 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 |
| 718 .byte 0 |
OLD | NEW |