OLD | NEW |
(Empty) | |
| 1 #if defined(__i386__) |
| 2 .file "chacha-x86.S" |
| 3 .text |
| 4 .globl ChaCha20_ctr32 |
| 5 .hidden ChaCha20_ctr32 |
| 6 .type ChaCha20_ctr32,@function |
| 7 .align 16 |
| 8 ChaCha20_ctr32: |
| 9 .L_ChaCha20_ctr32_begin: |
| 10 pushl %ebp |
| 11 pushl %ebx |
| 12 pushl %esi |
| 13 pushl %edi |
| 14 xorl %eax,%eax |
| 15 cmpl 28(%esp),%eax |
| 16 je .L000no_data |
| 17 call .Lpic_point |
| 18 .Lpic_point: |
| 19 popl %eax |
| 20 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp |
| 21 testl $16777216,(%ebp) |
| 22 jz .L001x86 |
| 23 testl $512,4(%ebp) |
| 24 jz .L001x86 |
| 25 jmp .Lssse3_shortcut |
| 26 .L001x86: |
| 27 movl 32(%esp),%esi |
| 28 movl 36(%esp),%edi |
| 29 subl $132,%esp |
| 30 movl (%esi),%eax |
| 31 movl 4(%esi),%ebx |
| 32 movl 8(%esi),%ecx |
| 33 movl 12(%esi),%edx |
| 34 movl %eax,80(%esp) |
| 35 movl %ebx,84(%esp) |
| 36 movl %ecx,88(%esp) |
| 37 movl %edx,92(%esp) |
| 38 movl 16(%esi),%eax |
| 39 movl 20(%esi),%ebx |
| 40 movl 24(%esi),%ecx |
| 41 movl 28(%esi),%edx |
| 42 movl %eax,96(%esp) |
| 43 movl %ebx,100(%esp) |
| 44 movl %ecx,104(%esp) |
| 45 movl %edx,108(%esp) |
| 46 movl (%edi),%eax |
| 47 movl 4(%edi),%ebx |
| 48 movl 8(%edi),%ecx |
| 49 movl 12(%edi),%edx |
| 50 subl $1,%eax |
| 51 movl %eax,112(%esp) |
| 52 movl %ebx,116(%esp) |
| 53 movl %ecx,120(%esp) |
| 54 movl %edx,124(%esp) |
| 55 jmp .L002entry |
| 56 .align 16 |
| 57 .L003outer_loop: |
| 58 movl %ebx,156(%esp) |
| 59 movl %eax,152(%esp) |
| 60 movl %ecx,160(%esp) |
| 61 .L002entry: |
| 62 movl $1634760805,%eax |
| 63 movl $857760878,4(%esp) |
| 64 movl $2036477234,8(%esp) |
| 65 movl $1797285236,12(%esp) |
| 66 movl 84(%esp),%ebx |
| 67 movl 88(%esp),%ebp |
| 68 movl 104(%esp),%ecx |
| 69 movl 108(%esp),%esi |
| 70 movl 116(%esp),%edx |
| 71 movl 120(%esp),%edi |
| 72 movl %ebx,20(%esp) |
| 73 movl %ebp,24(%esp) |
| 74 movl %ecx,40(%esp) |
| 75 movl %esi,44(%esp) |
| 76 movl %edx,52(%esp) |
| 77 movl %edi,56(%esp) |
| 78 movl 92(%esp),%ebx |
| 79 movl 124(%esp),%edi |
| 80 movl 112(%esp),%edx |
| 81 movl 80(%esp),%ebp |
| 82 movl 96(%esp),%ecx |
| 83 movl 100(%esp),%esi |
| 84 addl $1,%edx |
| 85 movl %ebx,28(%esp) |
| 86 movl %edi,60(%esp) |
| 87 movl %edx,112(%esp) |
| 88 movl $10,%ebx |
| 89 jmp .L004loop |
| 90 .align 16 |
| 91 .L004loop: |
| 92 addl %ebp,%eax |
| 93 movl %ebx,128(%esp) |
| 94 movl %ebp,%ebx |
| 95 xorl %eax,%edx |
| 96 roll $16,%edx |
| 97 addl %edx,%ecx |
| 98 xorl %ecx,%ebx |
| 99 movl 52(%esp),%edi |
| 100 roll $12,%ebx |
| 101 movl 20(%esp),%ebp |
| 102 addl %ebx,%eax |
| 103 xorl %eax,%edx |
| 104 movl %eax,(%esp) |
| 105 roll $8,%edx |
| 106 movl 4(%esp),%eax |
| 107 addl %edx,%ecx |
| 108 movl %edx,48(%esp) |
| 109 xorl %ecx,%ebx |
| 110 addl %ebp,%eax |
| 111 roll $7,%ebx |
| 112 xorl %eax,%edi |
| 113 movl %ecx,32(%esp) |
| 114 roll $16,%edi |
| 115 movl %ebx,16(%esp) |
| 116 addl %edi,%esi |
| 117 movl 40(%esp),%ecx |
| 118 xorl %esi,%ebp |
| 119 movl 56(%esp),%edx |
| 120 roll $12,%ebp |
| 121 movl 24(%esp),%ebx |
| 122 addl %ebp,%eax |
| 123 xorl %eax,%edi |
| 124 movl %eax,4(%esp) |
| 125 roll $8,%edi |
| 126 movl 8(%esp),%eax |
| 127 addl %edi,%esi |
| 128 movl %edi,52(%esp) |
| 129 xorl %esi,%ebp |
| 130 addl %ebx,%eax |
| 131 roll $7,%ebp |
| 132 xorl %eax,%edx |
| 133 movl %esi,36(%esp) |
| 134 roll $16,%edx |
| 135 movl %ebp,20(%esp) |
| 136 addl %edx,%ecx |
| 137 movl 44(%esp),%esi |
| 138 xorl %ecx,%ebx |
| 139 movl 60(%esp),%edi |
| 140 roll $12,%ebx |
| 141 movl 28(%esp),%ebp |
| 142 addl %ebx,%eax |
| 143 xorl %eax,%edx |
| 144 movl %eax,8(%esp) |
| 145 roll $8,%edx |
| 146 movl 12(%esp),%eax |
| 147 addl %edx,%ecx |
| 148 movl %edx,56(%esp) |
| 149 xorl %ecx,%ebx |
| 150 addl %ebp,%eax |
| 151 roll $7,%ebx |
| 152 xorl %eax,%edi |
| 153 roll $16,%edi |
| 154 movl %ebx,24(%esp) |
| 155 addl %edi,%esi |
| 156 xorl %esi,%ebp |
| 157 roll $12,%ebp |
| 158 movl 20(%esp),%ebx |
| 159 addl %ebp,%eax |
| 160 xorl %eax,%edi |
| 161 movl %eax,12(%esp) |
| 162 roll $8,%edi |
| 163 movl (%esp),%eax |
| 164 addl %edi,%esi |
| 165 movl %edi,%edx |
| 166 xorl %esi,%ebp |
| 167 addl %ebx,%eax |
| 168 roll $7,%ebp |
| 169 xorl %eax,%edx |
| 170 roll $16,%edx |
| 171 movl %ebp,28(%esp) |
| 172 addl %edx,%ecx |
| 173 xorl %ecx,%ebx |
| 174 movl 48(%esp),%edi |
| 175 roll $12,%ebx |
| 176 movl 24(%esp),%ebp |
| 177 addl %ebx,%eax |
| 178 xorl %eax,%edx |
| 179 movl %eax,(%esp) |
| 180 roll $8,%edx |
| 181 movl 4(%esp),%eax |
| 182 addl %edx,%ecx |
| 183 movl %edx,60(%esp) |
| 184 xorl %ecx,%ebx |
| 185 addl %ebp,%eax |
| 186 roll $7,%ebx |
| 187 xorl %eax,%edi |
| 188 movl %ecx,40(%esp) |
| 189 roll $16,%edi |
| 190 movl %ebx,20(%esp) |
| 191 addl %edi,%esi |
| 192 movl 32(%esp),%ecx |
| 193 xorl %esi,%ebp |
| 194 movl 52(%esp),%edx |
| 195 roll $12,%ebp |
| 196 movl 28(%esp),%ebx |
| 197 addl %ebp,%eax |
| 198 xorl %eax,%edi |
| 199 movl %eax,4(%esp) |
| 200 roll $8,%edi |
| 201 movl 8(%esp),%eax |
| 202 addl %edi,%esi |
| 203 movl %edi,48(%esp) |
| 204 xorl %esi,%ebp |
| 205 addl %ebx,%eax |
| 206 roll $7,%ebp |
| 207 xorl %eax,%edx |
| 208 movl %esi,44(%esp) |
| 209 roll $16,%edx |
| 210 movl %ebp,24(%esp) |
| 211 addl %edx,%ecx |
| 212 movl 36(%esp),%esi |
| 213 xorl %ecx,%ebx |
| 214 movl 56(%esp),%edi |
| 215 roll $12,%ebx |
| 216 movl 16(%esp),%ebp |
| 217 addl %ebx,%eax |
| 218 xorl %eax,%edx |
| 219 movl %eax,8(%esp) |
| 220 roll $8,%edx |
| 221 movl 12(%esp),%eax |
| 222 addl %edx,%ecx |
| 223 movl %edx,52(%esp) |
| 224 xorl %ecx,%ebx |
| 225 addl %ebp,%eax |
| 226 roll $7,%ebx |
| 227 xorl %eax,%edi |
| 228 roll $16,%edi |
| 229 movl %ebx,28(%esp) |
| 230 addl %edi,%esi |
| 231 xorl %esi,%ebp |
| 232 movl 48(%esp),%edx |
| 233 roll $12,%ebp |
| 234 movl 128(%esp),%ebx |
| 235 addl %ebp,%eax |
| 236 xorl %eax,%edi |
| 237 movl %eax,12(%esp) |
| 238 roll $8,%edi |
| 239 movl (%esp),%eax |
| 240 addl %edi,%esi |
| 241 movl %edi,56(%esp) |
| 242 xorl %esi,%ebp |
| 243 roll $7,%ebp |
| 244 decl %ebx |
| 245 jnz .L004loop |
| 246 movl 160(%esp),%ebx |
| 247 addl $1634760805,%eax |
| 248 addl 80(%esp),%ebp |
| 249 addl 96(%esp),%ecx |
| 250 addl 100(%esp),%esi |
| 251 cmpl $64,%ebx |
| 252 jb .L005tail |
| 253 movl 156(%esp),%ebx |
| 254 addl 112(%esp),%edx |
| 255 addl 120(%esp),%edi |
| 256 xorl (%ebx),%eax |
| 257 xorl 16(%ebx),%ebp |
| 258 movl %eax,(%esp) |
| 259 movl 152(%esp),%eax |
| 260 xorl 32(%ebx),%ecx |
| 261 xorl 36(%ebx),%esi |
| 262 xorl 48(%ebx),%edx |
| 263 xorl 56(%ebx),%edi |
| 264 movl %ebp,16(%eax) |
| 265 movl %ecx,32(%eax) |
| 266 movl %esi,36(%eax) |
| 267 movl %edx,48(%eax) |
| 268 movl %edi,56(%eax) |
| 269 movl 4(%esp),%ebp |
| 270 movl 8(%esp),%ecx |
| 271 movl 12(%esp),%esi |
| 272 movl 20(%esp),%edx |
| 273 movl 24(%esp),%edi |
| 274 addl $857760878,%ebp |
| 275 addl $2036477234,%ecx |
| 276 addl $1797285236,%esi |
| 277 addl 84(%esp),%edx |
| 278 addl 88(%esp),%edi |
| 279 xorl 4(%ebx),%ebp |
| 280 xorl 8(%ebx),%ecx |
| 281 xorl 12(%ebx),%esi |
| 282 xorl 20(%ebx),%edx |
| 283 xorl 24(%ebx),%edi |
| 284 movl %ebp,4(%eax) |
| 285 movl %ecx,8(%eax) |
| 286 movl %esi,12(%eax) |
| 287 movl %edx,20(%eax) |
| 288 movl %edi,24(%eax) |
| 289 movl 28(%esp),%ebp |
| 290 movl 40(%esp),%ecx |
| 291 movl 44(%esp),%esi |
| 292 movl 52(%esp),%edx |
| 293 movl 60(%esp),%edi |
| 294 addl 92(%esp),%ebp |
| 295 addl 104(%esp),%ecx |
| 296 addl 108(%esp),%esi |
| 297 addl 116(%esp),%edx |
| 298 addl 124(%esp),%edi |
| 299 xorl 28(%ebx),%ebp |
| 300 xorl 40(%ebx),%ecx |
| 301 xorl 44(%ebx),%esi |
| 302 xorl 52(%ebx),%edx |
| 303 xorl 60(%ebx),%edi |
| 304 leal 64(%ebx),%ebx |
| 305 movl %ebp,28(%eax) |
| 306 movl (%esp),%ebp |
| 307 movl %ecx,40(%eax) |
| 308 movl 160(%esp),%ecx |
| 309 movl %esi,44(%eax) |
| 310 movl %edx,52(%eax) |
| 311 movl %edi,60(%eax) |
| 312 movl %ebp,(%eax) |
| 313 leal 64(%eax),%eax |
| 314 subl $64,%ecx |
| 315 jnz .L003outer_loop |
| 316 jmp .L006done |
| 317 .L005tail: |
| 318 addl 112(%esp),%edx |
| 319 addl 120(%esp),%edi |
| 320 movl %eax,(%esp) |
| 321 movl %ebp,16(%esp) |
| 322 movl %ecx,32(%esp) |
| 323 movl %esi,36(%esp) |
| 324 movl %edx,48(%esp) |
| 325 movl %edi,56(%esp) |
| 326 movl 4(%esp),%ebp |
| 327 movl 8(%esp),%ecx |
| 328 movl 12(%esp),%esi |
| 329 movl 20(%esp),%edx |
| 330 movl 24(%esp),%edi |
| 331 addl $857760878,%ebp |
| 332 addl $2036477234,%ecx |
| 333 addl $1797285236,%esi |
| 334 addl 84(%esp),%edx |
| 335 addl 88(%esp),%edi |
| 336 movl %ebp,4(%esp) |
| 337 movl %ecx,8(%esp) |
| 338 movl %esi,12(%esp) |
| 339 movl %edx,20(%esp) |
| 340 movl %edi,24(%esp) |
| 341 movl 28(%esp),%ebp |
| 342 movl 40(%esp),%ecx |
| 343 movl 44(%esp),%esi |
| 344 movl 52(%esp),%edx |
| 345 movl 60(%esp),%edi |
| 346 addl 92(%esp),%ebp |
| 347 addl 104(%esp),%ecx |
| 348 addl 108(%esp),%esi |
| 349 addl 116(%esp),%edx |
| 350 addl 124(%esp),%edi |
| 351 movl %ebp,28(%esp) |
| 352 movl 156(%esp),%ebp |
| 353 movl %ecx,40(%esp) |
| 354 movl 152(%esp),%ecx |
| 355 movl %esi,44(%esp) |
| 356 xorl %esi,%esi |
| 357 movl %edx,52(%esp) |
| 358 movl %edi,60(%esp) |
| 359 xorl %eax,%eax |
| 360 xorl %edx,%edx |
| 361 .L007tail_loop: |
| 362 movb (%esi,%ebp,1),%al |
| 363 movb (%esp,%esi,1),%dl |
| 364 leal 1(%esi),%esi |
| 365 xorb %dl,%al |
| 366 movb %al,-1(%ecx,%esi,1) |
| 367 decl %ebx |
| 368 jnz .L007tail_loop |
| 369 .L006done: |
| 370 addl $132,%esp |
| 371 .L000no_data: |
| 372 popl %edi |
| 373 popl %esi |
| 374 popl %ebx |
| 375 popl %ebp |
| 376 ret |
| 377 .size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin |
| 378 .globl ChaCha20_ssse3 |
| 379 .hidden ChaCha20_ssse3 |
| 380 .type ChaCha20_ssse3,@function |
| 381 .align 16 |
| 382 ChaCha20_ssse3: |
| 383 .L_ChaCha20_ssse3_begin: |
| 384 pushl %ebp |
| 385 pushl %ebx |
| 386 pushl %esi |
| 387 pushl %edi |
| 388 .Lssse3_shortcut: |
| 389 movl 20(%esp),%edi |
| 390 movl 24(%esp),%esi |
| 391 movl 28(%esp),%ecx |
| 392 movl 32(%esp),%edx |
| 393 movl 36(%esp),%ebx |
| 394 movl %esp,%ebp |
| 395 subl $524,%esp |
| 396 andl $-64,%esp |
| 397 movl %ebp,512(%esp) |
| 398 leal .Lssse3_data-.Lpic_point(%eax),%eax |
| 399 movdqu (%ebx),%xmm3 |
| 400 cmpl $256,%ecx |
| 401 jb .L0081x |
| 402 movl %edx,516(%esp) |
| 403 movl %ebx,520(%esp) |
| 404 subl $256,%ecx |
| 405 leal 384(%esp),%ebp |
| 406 movdqu (%edx),%xmm7 |
| 407 pshufd $0,%xmm3,%xmm0 |
| 408 pshufd $85,%xmm3,%xmm1 |
| 409 pshufd $170,%xmm3,%xmm2 |
| 410 pshufd $255,%xmm3,%xmm3 |
| 411 paddd 48(%eax),%xmm0 |
| 412 pshufd $0,%xmm7,%xmm4 |
| 413 pshufd $85,%xmm7,%xmm5 |
| 414 psubd 64(%eax),%xmm0 |
| 415 pshufd $170,%xmm7,%xmm6 |
| 416 pshufd $255,%xmm7,%xmm7 |
| 417 movdqa %xmm0,64(%ebp) |
| 418 movdqa %xmm1,80(%ebp) |
| 419 movdqa %xmm2,96(%ebp) |
| 420 movdqa %xmm3,112(%ebp) |
| 421 movdqu 16(%edx),%xmm3 |
| 422 movdqa %xmm4,-64(%ebp) |
| 423 movdqa %xmm5,-48(%ebp) |
| 424 movdqa %xmm6,-32(%ebp) |
| 425 movdqa %xmm7,-16(%ebp) |
| 426 movdqa 32(%eax),%xmm7 |
| 427 leal 128(%esp),%ebx |
| 428 pshufd $0,%xmm3,%xmm0 |
| 429 pshufd $85,%xmm3,%xmm1 |
| 430 pshufd $170,%xmm3,%xmm2 |
| 431 pshufd $255,%xmm3,%xmm3 |
| 432 pshufd $0,%xmm7,%xmm4 |
| 433 pshufd $85,%xmm7,%xmm5 |
| 434 pshufd $170,%xmm7,%xmm6 |
| 435 pshufd $255,%xmm7,%xmm7 |
| 436 movdqa %xmm0,(%ebp) |
| 437 movdqa %xmm1,16(%ebp) |
| 438 movdqa %xmm2,32(%ebp) |
| 439 movdqa %xmm3,48(%ebp) |
| 440 movdqa %xmm4,-128(%ebp) |
| 441 movdqa %xmm5,-112(%ebp) |
| 442 movdqa %xmm6,-96(%ebp) |
| 443 movdqa %xmm7,-80(%ebp) |
| 444 leal 128(%esi),%esi |
| 445 leal 128(%edi),%edi |
| 446 jmp .L009outer_loop |
| 447 .align 16 |
| 448 .L009outer_loop: |
| 449 movdqa -112(%ebp),%xmm1 |
| 450 movdqa -96(%ebp),%xmm2 |
| 451 movdqa -80(%ebp),%xmm3 |
| 452 movdqa -48(%ebp),%xmm5 |
| 453 movdqa -32(%ebp),%xmm6 |
| 454 movdqa -16(%ebp),%xmm7 |
| 455 movdqa %xmm1,-112(%ebx) |
| 456 movdqa %xmm2,-96(%ebx) |
| 457 movdqa %xmm3,-80(%ebx) |
| 458 movdqa %xmm5,-48(%ebx) |
| 459 movdqa %xmm6,-32(%ebx) |
| 460 movdqa %xmm7,-16(%ebx) |
| 461 movdqa 32(%ebp),%xmm2 |
| 462 movdqa 48(%ebp),%xmm3 |
| 463 movdqa 64(%ebp),%xmm4 |
| 464 movdqa 80(%ebp),%xmm5 |
| 465 movdqa 96(%ebp),%xmm6 |
| 466 movdqa 112(%ebp),%xmm7 |
| 467 paddd 64(%eax),%xmm4 |
| 468 movdqa %xmm2,32(%ebx) |
| 469 movdqa %xmm3,48(%ebx) |
| 470 movdqa %xmm4,64(%ebx) |
| 471 movdqa %xmm5,80(%ebx) |
| 472 movdqa %xmm6,96(%ebx) |
| 473 movdqa %xmm7,112(%ebx) |
| 474 movdqa %xmm4,64(%ebp) |
| 475 movdqa -128(%ebp),%xmm0 |
| 476 movdqa %xmm4,%xmm6 |
| 477 movdqa -64(%ebp),%xmm3 |
| 478 movdqa (%ebp),%xmm4 |
| 479 movdqa 16(%ebp),%xmm5 |
| 480 movl $10,%edx |
| 481 nop |
| 482 .align 16 |
| 483 .L010loop: |
| 484 paddd %xmm3,%xmm0 |
| 485 movdqa %xmm3,%xmm2 |
| 486 pxor %xmm0,%xmm6 |
| 487 pshufb (%eax),%xmm6 |
| 488 paddd %xmm6,%xmm4 |
| 489 pxor %xmm4,%xmm2 |
| 490 movdqa -48(%ebx),%xmm3 |
| 491 movdqa %xmm2,%xmm1 |
| 492 pslld $12,%xmm2 |
| 493 psrld $20,%xmm1 |
| 494 por %xmm1,%xmm2 |
| 495 movdqa -112(%ebx),%xmm1 |
| 496 paddd %xmm2,%xmm0 |
| 497 movdqa 80(%ebx),%xmm7 |
| 498 pxor %xmm0,%xmm6 |
| 499 movdqa %xmm0,-128(%ebx) |
| 500 pshufb 16(%eax),%xmm6 |
| 501 paddd %xmm6,%xmm4 |
| 502 movdqa %xmm6,64(%ebx) |
| 503 pxor %xmm4,%xmm2 |
| 504 paddd %xmm3,%xmm1 |
| 505 movdqa %xmm2,%xmm0 |
| 506 pslld $7,%xmm2 |
| 507 psrld $25,%xmm0 |
| 508 pxor %xmm1,%xmm7 |
| 509 por %xmm0,%xmm2 |
| 510 movdqa %xmm4,(%ebx) |
| 511 pshufb (%eax),%xmm7 |
| 512 movdqa %xmm2,-64(%ebx) |
| 513 paddd %xmm7,%xmm5 |
| 514 movdqa 32(%ebx),%xmm4 |
| 515 pxor %xmm5,%xmm3 |
| 516 movdqa -32(%ebx),%xmm2 |
| 517 movdqa %xmm3,%xmm0 |
| 518 pslld $12,%xmm3 |
| 519 psrld $20,%xmm0 |
| 520 por %xmm0,%xmm3 |
| 521 movdqa -96(%ebx),%xmm0 |
| 522 paddd %xmm3,%xmm1 |
| 523 movdqa 96(%ebx),%xmm6 |
| 524 pxor %xmm1,%xmm7 |
| 525 movdqa %xmm1,-112(%ebx) |
| 526 pshufb 16(%eax),%xmm7 |
| 527 paddd %xmm7,%xmm5 |
| 528 movdqa %xmm7,80(%ebx) |
| 529 pxor %xmm5,%xmm3 |
| 530 paddd %xmm2,%xmm0 |
| 531 movdqa %xmm3,%xmm1 |
| 532 pslld $7,%xmm3 |
| 533 psrld $25,%xmm1 |
| 534 pxor %xmm0,%xmm6 |
| 535 por %xmm1,%xmm3 |
| 536 movdqa %xmm5,16(%ebx) |
| 537 pshufb (%eax),%xmm6 |
| 538 movdqa %xmm3,-48(%ebx) |
| 539 paddd %xmm6,%xmm4 |
| 540 movdqa 48(%ebx),%xmm5 |
| 541 pxor %xmm4,%xmm2 |
| 542 movdqa -16(%ebx),%xmm3 |
| 543 movdqa %xmm2,%xmm1 |
| 544 pslld $12,%xmm2 |
| 545 psrld $20,%xmm1 |
| 546 por %xmm1,%xmm2 |
| 547 movdqa -80(%ebx),%xmm1 |
| 548 paddd %xmm2,%xmm0 |
| 549 movdqa 112(%ebx),%xmm7 |
| 550 pxor %xmm0,%xmm6 |
| 551 movdqa %xmm0,-96(%ebx) |
| 552 pshufb 16(%eax),%xmm6 |
| 553 paddd %xmm6,%xmm4 |
| 554 movdqa %xmm6,96(%ebx) |
| 555 pxor %xmm4,%xmm2 |
| 556 paddd %xmm3,%xmm1 |
| 557 movdqa %xmm2,%xmm0 |
| 558 pslld $7,%xmm2 |
| 559 psrld $25,%xmm0 |
| 560 pxor %xmm1,%xmm7 |
| 561 por %xmm0,%xmm2 |
| 562 pshufb (%eax),%xmm7 |
| 563 movdqa %xmm2,-32(%ebx) |
| 564 paddd %xmm7,%xmm5 |
| 565 pxor %xmm5,%xmm3 |
| 566 movdqa -48(%ebx),%xmm2 |
| 567 movdqa %xmm3,%xmm0 |
| 568 pslld $12,%xmm3 |
| 569 psrld $20,%xmm0 |
| 570 por %xmm0,%xmm3 |
| 571 movdqa -128(%ebx),%xmm0 |
| 572 paddd %xmm3,%xmm1 |
| 573 pxor %xmm1,%xmm7 |
| 574 movdqa %xmm1,-80(%ebx) |
| 575 pshufb 16(%eax),%xmm7 |
| 576 paddd %xmm7,%xmm5 |
| 577 movdqa %xmm7,%xmm6 |
| 578 pxor %xmm5,%xmm3 |
| 579 paddd %xmm2,%xmm0 |
| 580 movdqa %xmm3,%xmm1 |
| 581 pslld $7,%xmm3 |
| 582 psrld $25,%xmm1 |
| 583 pxor %xmm0,%xmm6 |
| 584 por %xmm1,%xmm3 |
| 585 pshufb (%eax),%xmm6 |
| 586 movdqa %xmm3,-16(%ebx) |
| 587 paddd %xmm6,%xmm4 |
| 588 pxor %xmm4,%xmm2 |
| 589 movdqa -32(%ebx),%xmm3 |
| 590 movdqa %xmm2,%xmm1 |
| 591 pslld $12,%xmm2 |
| 592 psrld $20,%xmm1 |
| 593 por %xmm1,%xmm2 |
| 594 movdqa -112(%ebx),%xmm1 |
| 595 paddd %xmm2,%xmm0 |
| 596 movdqa 64(%ebx),%xmm7 |
| 597 pxor %xmm0,%xmm6 |
| 598 movdqa %xmm0,-128(%ebx) |
| 599 pshufb 16(%eax),%xmm6 |
| 600 paddd %xmm6,%xmm4 |
| 601 movdqa %xmm6,112(%ebx) |
| 602 pxor %xmm4,%xmm2 |
| 603 paddd %xmm3,%xmm1 |
| 604 movdqa %xmm2,%xmm0 |
| 605 pslld $7,%xmm2 |
| 606 psrld $25,%xmm0 |
| 607 pxor %xmm1,%xmm7 |
| 608 por %xmm0,%xmm2 |
| 609 movdqa %xmm4,32(%ebx) |
| 610 pshufb (%eax),%xmm7 |
| 611 movdqa %xmm2,-48(%ebx) |
| 612 paddd %xmm7,%xmm5 |
| 613 movdqa (%ebx),%xmm4 |
| 614 pxor %xmm5,%xmm3 |
| 615 movdqa -16(%ebx),%xmm2 |
| 616 movdqa %xmm3,%xmm0 |
| 617 pslld $12,%xmm3 |
| 618 psrld $20,%xmm0 |
| 619 por %xmm0,%xmm3 |
| 620 movdqa -96(%ebx),%xmm0 |
| 621 paddd %xmm3,%xmm1 |
| 622 movdqa 80(%ebx),%xmm6 |
| 623 pxor %xmm1,%xmm7 |
| 624 movdqa %xmm1,-112(%ebx) |
| 625 pshufb 16(%eax),%xmm7 |
| 626 paddd %xmm7,%xmm5 |
| 627 movdqa %xmm7,64(%ebx) |
| 628 pxor %xmm5,%xmm3 |
| 629 paddd %xmm2,%xmm0 |
| 630 movdqa %xmm3,%xmm1 |
| 631 pslld $7,%xmm3 |
| 632 psrld $25,%xmm1 |
| 633 pxor %xmm0,%xmm6 |
| 634 por %xmm1,%xmm3 |
| 635 movdqa %xmm5,48(%ebx) |
| 636 pshufb (%eax),%xmm6 |
| 637 movdqa %xmm3,-32(%ebx) |
| 638 paddd %xmm6,%xmm4 |
| 639 movdqa 16(%ebx),%xmm5 |
| 640 pxor %xmm4,%xmm2 |
| 641 movdqa -64(%ebx),%xmm3 |
| 642 movdqa %xmm2,%xmm1 |
| 643 pslld $12,%xmm2 |
| 644 psrld $20,%xmm1 |
| 645 por %xmm1,%xmm2 |
| 646 movdqa -80(%ebx),%xmm1 |
| 647 paddd %xmm2,%xmm0 |
| 648 movdqa 96(%ebx),%xmm7 |
| 649 pxor %xmm0,%xmm6 |
| 650 movdqa %xmm0,-96(%ebx) |
| 651 pshufb 16(%eax),%xmm6 |
| 652 paddd %xmm6,%xmm4 |
| 653 movdqa %xmm6,80(%ebx) |
| 654 pxor %xmm4,%xmm2 |
| 655 paddd %xmm3,%xmm1 |
| 656 movdqa %xmm2,%xmm0 |
| 657 pslld $7,%xmm2 |
| 658 psrld $25,%xmm0 |
| 659 pxor %xmm1,%xmm7 |
| 660 por %xmm0,%xmm2 |
| 661 pshufb (%eax),%xmm7 |
| 662 movdqa %xmm2,-16(%ebx) |
| 663 paddd %xmm7,%xmm5 |
| 664 pxor %xmm5,%xmm3 |
| 665 movdqa %xmm3,%xmm0 |
| 666 pslld $12,%xmm3 |
| 667 psrld $20,%xmm0 |
| 668 por %xmm0,%xmm3 |
| 669 movdqa -128(%ebx),%xmm0 |
| 670 paddd %xmm3,%xmm1 |
| 671 movdqa 64(%ebx),%xmm6 |
| 672 pxor %xmm1,%xmm7 |
| 673 movdqa %xmm1,-80(%ebx) |
| 674 pshufb 16(%eax),%xmm7 |
| 675 paddd %xmm7,%xmm5 |
| 676 movdqa %xmm7,96(%ebx) |
| 677 pxor %xmm5,%xmm3 |
| 678 movdqa %xmm3,%xmm1 |
| 679 pslld $7,%xmm3 |
| 680 psrld $25,%xmm1 |
| 681 por %xmm1,%xmm3 |
| 682 decl %edx |
| 683 jnz .L010loop |
| 684 movdqa %xmm3,-64(%ebx) |
| 685 movdqa %xmm4,(%ebx) |
| 686 movdqa %xmm5,16(%ebx) |
| 687 movdqa %xmm6,64(%ebx) |
| 688 movdqa %xmm7,96(%ebx) |
| 689 movdqa -112(%ebx),%xmm1 |
| 690 movdqa -96(%ebx),%xmm2 |
| 691 movdqa -80(%ebx),%xmm3 |
| 692 paddd -128(%ebp),%xmm0 |
| 693 paddd -112(%ebp),%xmm1 |
| 694 paddd -96(%ebp),%xmm2 |
| 695 paddd -80(%ebp),%xmm3 |
| 696 movdqa %xmm0,%xmm6 |
| 697 punpckldq %xmm1,%xmm0 |
| 698 movdqa %xmm2,%xmm7 |
| 699 punpckldq %xmm3,%xmm2 |
| 700 punpckhdq %xmm1,%xmm6 |
| 701 punpckhdq %xmm3,%xmm7 |
| 702 movdqa %xmm0,%xmm1 |
| 703 punpcklqdq %xmm2,%xmm0 |
| 704 movdqa %xmm6,%xmm3 |
| 705 punpcklqdq %xmm7,%xmm6 |
| 706 punpckhqdq %xmm2,%xmm1 |
| 707 punpckhqdq %xmm7,%xmm3 |
| 708 movdqu -128(%esi),%xmm4 |
| 709 movdqu -64(%esi),%xmm5 |
| 710 movdqu (%esi),%xmm2 |
| 711 movdqu 64(%esi),%xmm7 |
| 712 leal 16(%esi),%esi |
| 713 pxor %xmm0,%xmm4 |
| 714 movdqa -64(%ebx),%xmm0 |
| 715 pxor %xmm1,%xmm5 |
| 716 movdqa -48(%ebx),%xmm1 |
| 717 pxor %xmm2,%xmm6 |
| 718 movdqa -32(%ebx),%xmm2 |
| 719 pxor %xmm3,%xmm7 |
| 720 movdqa -16(%ebx),%xmm3 |
| 721 movdqu %xmm4,-128(%edi) |
| 722 movdqu %xmm5,-64(%edi) |
| 723 movdqu %xmm6,(%edi) |
| 724 movdqu %xmm7,64(%edi) |
| 725 leal 16(%edi),%edi |
| 726 paddd -64(%ebp),%xmm0 |
| 727 paddd -48(%ebp),%xmm1 |
| 728 paddd -32(%ebp),%xmm2 |
| 729 paddd -16(%ebp),%xmm3 |
| 730 movdqa %xmm0,%xmm6 |
| 731 punpckldq %xmm1,%xmm0 |
| 732 movdqa %xmm2,%xmm7 |
| 733 punpckldq %xmm3,%xmm2 |
| 734 punpckhdq %xmm1,%xmm6 |
| 735 punpckhdq %xmm3,%xmm7 |
| 736 movdqa %xmm0,%xmm1 |
| 737 punpcklqdq %xmm2,%xmm0 |
| 738 movdqa %xmm6,%xmm3 |
| 739 punpcklqdq %xmm7,%xmm6 |
| 740 punpckhqdq %xmm2,%xmm1 |
| 741 punpckhqdq %xmm7,%xmm3 |
| 742 movdqu -128(%esi),%xmm4 |
| 743 movdqu -64(%esi),%xmm5 |
| 744 movdqu (%esi),%xmm2 |
| 745 movdqu 64(%esi),%xmm7 |
| 746 leal 16(%esi),%esi |
| 747 pxor %xmm0,%xmm4 |
| 748 movdqa (%ebx),%xmm0 |
| 749 pxor %xmm1,%xmm5 |
| 750 movdqa 16(%ebx),%xmm1 |
| 751 pxor %xmm2,%xmm6 |
| 752 movdqa 32(%ebx),%xmm2 |
| 753 pxor %xmm3,%xmm7 |
| 754 movdqa 48(%ebx),%xmm3 |
| 755 movdqu %xmm4,-128(%edi) |
| 756 movdqu %xmm5,-64(%edi) |
| 757 movdqu %xmm6,(%edi) |
| 758 movdqu %xmm7,64(%edi) |
| 759 leal 16(%edi),%edi |
| 760 paddd (%ebp),%xmm0 |
| 761 paddd 16(%ebp),%xmm1 |
| 762 paddd 32(%ebp),%xmm2 |
| 763 paddd 48(%ebp),%xmm3 |
| 764 movdqa %xmm0,%xmm6 |
| 765 punpckldq %xmm1,%xmm0 |
| 766 movdqa %xmm2,%xmm7 |
| 767 punpckldq %xmm3,%xmm2 |
| 768 punpckhdq %xmm1,%xmm6 |
| 769 punpckhdq %xmm3,%xmm7 |
| 770 movdqa %xmm0,%xmm1 |
| 771 punpcklqdq %xmm2,%xmm0 |
| 772 movdqa %xmm6,%xmm3 |
| 773 punpcklqdq %xmm7,%xmm6 |
| 774 punpckhqdq %xmm2,%xmm1 |
| 775 punpckhqdq %xmm7,%xmm3 |
| 776 movdqu -128(%esi),%xmm4 |
| 777 movdqu -64(%esi),%xmm5 |
| 778 movdqu (%esi),%xmm2 |
| 779 movdqu 64(%esi),%xmm7 |
| 780 leal 16(%esi),%esi |
| 781 pxor %xmm0,%xmm4 |
| 782 movdqa 64(%ebx),%xmm0 |
| 783 pxor %xmm1,%xmm5 |
| 784 movdqa 80(%ebx),%xmm1 |
| 785 pxor %xmm2,%xmm6 |
| 786 movdqa 96(%ebx),%xmm2 |
| 787 pxor %xmm3,%xmm7 |
| 788 movdqa 112(%ebx),%xmm3 |
| 789 movdqu %xmm4,-128(%edi) |
| 790 movdqu %xmm5,-64(%edi) |
| 791 movdqu %xmm6,(%edi) |
| 792 movdqu %xmm7,64(%edi) |
| 793 leal 16(%edi),%edi |
| 794 paddd 64(%ebp),%xmm0 |
| 795 paddd 80(%ebp),%xmm1 |
| 796 paddd 96(%ebp),%xmm2 |
| 797 paddd 112(%ebp),%xmm3 |
| 798 movdqa %xmm0,%xmm6 |
| 799 punpckldq %xmm1,%xmm0 |
| 800 movdqa %xmm2,%xmm7 |
| 801 punpckldq %xmm3,%xmm2 |
| 802 punpckhdq %xmm1,%xmm6 |
| 803 punpckhdq %xmm3,%xmm7 |
| 804 movdqa %xmm0,%xmm1 |
| 805 punpcklqdq %xmm2,%xmm0 |
| 806 movdqa %xmm6,%xmm3 |
| 807 punpcklqdq %xmm7,%xmm6 |
| 808 punpckhqdq %xmm2,%xmm1 |
| 809 punpckhqdq %xmm7,%xmm3 |
| 810 movdqu -128(%esi),%xmm4 |
| 811 movdqu -64(%esi),%xmm5 |
| 812 movdqu (%esi),%xmm2 |
| 813 movdqu 64(%esi),%xmm7 |
| 814 leal 208(%esi),%esi |
| 815 pxor %xmm0,%xmm4 |
| 816 pxor %xmm1,%xmm5 |
| 817 pxor %xmm2,%xmm6 |
| 818 pxor %xmm3,%xmm7 |
| 819 movdqu %xmm4,-128(%edi) |
| 820 movdqu %xmm5,-64(%edi) |
| 821 movdqu %xmm6,(%edi) |
| 822 movdqu %xmm7,64(%edi) |
| 823 leal 208(%edi),%edi |
| 824 subl $256,%ecx |
| 825 jnc .L009outer_loop |
| 826 addl $256,%ecx |
| 827 jz .L011done |
| 828 movl 520(%esp),%ebx |
| 829 leal -128(%esi),%esi |
| 830 movl 516(%esp),%edx |
| 831 leal -128(%edi),%edi |
| 832 movd 64(%ebp),%xmm2 |
| 833 movdqu (%ebx),%xmm3 |
| 834 paddd 96(%eax),%xmm2 |
| 835 pand 112(%eax),%xmm3 |
| 836 por %xmm2,%xmm3 |
| 837 .L0081x: |
| 838 movdqa 32(%eax),%xmm0 |
| 839 movdqu (%edx),%xmm1 |
| 840 movdqu 16(%edx),%xmm2 |
| 841 movdqa (%eax),%xmm6 |
| 842 movdqa 16(%eax),%xmm7 |
| 843 movl %ebp,48(%esp) |
| 844 movdqa %xmm0,(%esp) |
| 845 movdqa %xmm1,16(%esp) |
| 846 movdqa %xmm2,32(%esp) |
| 847 movdqa %xmm3,48(%esp) |
| 848 movl $10,%edx |
| 849 jmp .L012loop1x |
| 850 .align 16 |
| 851 .L013outer1x: |
| 852 movdqa 80(%eax),%xmm3 |
| 853 movdqa (%esp),%xmm0 |
| 854 movdqa 16(%esp),%xmm1 |
| 855 movdqa 32(%esp),%xmm2 |
| 856 paddd 48(%esp),%xmm3 |
| 857 movl $10,%edx |
| 858 movdqa %xmm3,48(%esp) |
| 859 jmp .L012loop1x |
| 860 .align 16 |
| 861 .L012loop1x: |
| 862 paddd %xmm1,%xmm0 |
| 863 pxor %xmm0,%xmm3 |
| 864 .byte 102,15,56,0,222 |
| 865 paddd %xmm3,%xmm2 |
| 866 pxor %xmm2,%xmm1 |
| 867 movdqa %xmm1,%xmm4 |
| 868 psrld $20,%xmm1 |
| 869 pslld $12,%xmm4 |
| 870 por %xmm4,%xmm1 |
| 871 paddd %xmm1,%xmm0 |
| 872 pxor %xmm0,%xmm3 |
| 873 .byte 102,15,56,0,223 |
| 874 paddd %xmm3,%xmm2 |
| 875 pxor %xmm2,%xmm1 |
| 876 movdqa %xmm1,%xmm4 |
| 877 psrld $25,%xmm1 |
| 878 pslld $7,%xmm4 |
| 879 por %xmm4,%xmm1 |
| 880 pshufd $78,%xmm2,%xmm2 |
| 881 pshufd $57,%xmm1,%xmm1 |
| 882 pshufd $147,%xmm3,%xmm3 |
| 883 nop |
| 884 paddd %xmm1,%xmm0 |
| 885 pxor %xmm0,%xmm3 |
| 886 .byte 102,15,56,0,222 |
| 887 paddd %xmm3,%xmm2 |
| 888 pxor %xmm2,%xmm1 |
| 889 movdqa %xmm1,%xmm4 |
| 890 psrld $20,%xmm1 |
| 891 pslld $12,%xmm4 |
| 892 por %xmm4,%xmm1 |
| 893 paddd %xmm1,%xmm0 |
| 894 pxor %xmm0,%xmm3 |
| 895 .byte 102,15,56,0,223 |
| 896 paddd %xmm3,%xmm2 |
| 897 pxor %xmm2,%xmm1 |
| 898 movdqa %xmm1,%xmm4 |
| 899 psrld $25,%xmm1 |
| 900 pslld $7,%xmm4 |
| 901 por %xmm4,%xmm1 |
| 902 pshufd $78,%xmm2,%xmm2 |
| 903 pshufd $147,%xmm1,%xmm1 |
| 904 pshufd $57,%xmm3,%xmm3 |
| 905 decl %edx |
| 906 jnz .L012loop1x |
| 907 paddd (%esp),%xmm0 |
| 908 paddd 16(%esp),%xmm1 |
| 909 paddd 32(%esp),%xmm2 |
| 910 paddd 48(%esp),%xmm3 |
| 911 cmpl $64,%ecx |
| 912 jb .L014tail |
| 913 movdqu (%esi),%xmm4 |
| 914 movdqu 16(%esi),%xmm5 |
| 915 pxor %xmm4,%xmm0 |
| 916 movdqu 32(%esi),%xmm4 |
| 917 pxor %xmm5,%xmm1 |
| 918 movdqu 48(%esi),%xmm5 |
| 919 pxor %xmm4,%xmm2 |
| 920 pxor %xmm5,%xmm3 |
| 921 leal 64(%esi),%esi |
| 922 movdqu %xmm0,(%edi) |
| 923 movdqu %xmm1,16(%edi) |
| 924 movdqu %xmm2,32(%edi) |
| 925 movdqu %xmm3,48(%edi) |
| 926 leal 64(%edi),%edi |
| 927 subl $64,%ecx |
| 928 jnz .L013outer1x |
| 929 jmp .L011done |
| 930 .L014tail: |
| 931 movdqa %xmm0,(%esp) |
| 932 movdqa %xmm1,16(%esp) |
| 933 movdqa %xmm2,32(%esp) |
| 934 movdqa %xmm3,48(%esp) |
| 935 xorl %eax,%eax |
| 936 xorl %edx,%edx |
| 937 xorl %ebp,%ebp |
| 938 .L015tail_loop: |
| 939 movb (%esp,%ebp,1),%al |
| 940 movb (%esi,%ebp,1),%dl |
| 941 leal 1(%ebp),%ebp |
| 942 xorb %dl,%al |
| 943 movb %al,-1(%edi,%ebp,1) |
| 944 decl %ecx |
| 945 jnz .L015tail_loop |
| 946 .L011done: |
| 947 movl 512(%esp),%esp |
| 948 popl %edi |
| 949 popl %esi |
| 950 popl %ebx |
| 951 popl %ebp |
| 952 ret |
| 953 .size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin |
| 954 .align 64 |
| 955 .Lssse3_data: |
| 956 .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 |
| 957 .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 |
| 958 .long 1634760805,857760878,2036477234,1797285236 |
| 959 .long 0,1,2,3 |
| 960 .long 4,4,4,4 |
| 961 .long 1,0,0,0 |
| 962 .long 4,0,0,0 |
| 963 .long 0,-1,-1,-1 |
| 964 .align 64 |
| 965 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 |
| 966 .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 |
| 967 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 |
| 968 .byte 114,103,62,0 |
| 969 #endif |
OLD | NEW |