| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 | 3 |
| 4 .extern OPENSSL_ia32cap_P | 4 .extern OPENSSL_ia32cap_P |
| 5 .hidden OPENSSL_ia32cap_P | 5 .hidden OPENSSL_ia32cap_P |
| 6 | 6 |
| 7 .align 64 | 7 .align 64 |
| 8 .Lzero: | 8 .Lzero: |
| 9 .long 0,0,0,0 | 9 .long 0,0,0,0 |
| 10 .Lone: | 10 .Lone: |
| 11 .long 1,0,0,0 | 11 .long 1,0,0,0 |
| 12 .Linc: | 12 .Linc: |
| 13 .long 0,1,2,3 | 13 .long 0,1,2,3 |
| 14 .Lfour: | 14 .Lfour: |
| 15 .long 4,4,4,4 | 15 .long 4,4,4,4 |
| 16 .Lincy: | 16 .Lincy: |
| 17 .long 0,2,4,6,1,3,5,7 | 17 .long 0,2,4,6,1,3,5,7 |
| 18 .Leight: | 18 .Leight: |
| 19 .long 8,8,8,8,8,8,8,8 | 19 .long 8,8,8,8,8,8,8,8 |
| 20 .Lrot16: | 20 .Lrot16: |
| 21 .byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd | 21 .byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd |
| 22 .Lrot24: | 22 .Lrot24: |
| 23 .byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe | 23 .byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe |
| 24 .Lsigma: | 24 .Lsigma: |
| 25 .byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 | 25 .byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 |
| 26 .align 64 |
| 27 .Lzeroz: |
| 28 .long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 |
| 29 .Lfourz: |
| 30 .long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 |
| 31 .Lincz: |
| 32 .long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 |
| 33 .Lsixteen: |
| 34 .long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 |
| 26 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,
82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110
,115,115,108,46,111,114,103,62,0 | 35 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,
82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110
,115,115,108,46,111,114,103,62,0 |
| 27 .globl ChaCha20_ctr32 | 36 .globl ChaCha20_ctr32 |
| 28 .hidden ChaCha20_ctr32 | 37 .hidden ChaCha20_ctr32 |
| 29 .type ChaCha20_ctr32,@function | 38 .type ChaCha20_ctr32,@function |
| 30 .align 64 | 39 .align 64 |
| 31 ChaCha20_ctr32: | 40 ChaCha20_ctr32: |
| 32 cmpq $0,%rdx | 41 cmpq $0,%rdx |
| 33 je .Lno_data | 42 je .Lno_data |
| 34 movq OPENSSL_ia32cap_P+4(%rip),%r10 | 43 movq OPENSSL_ia32cap_P+4(%rip),%r10 |
| 35 testl $512,%r10d | 44 testl $512,%r10d |
| (...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 309 movdqu (%rcx),%xmm1 | 318 movdqu (%rcx),%xmm1 |
| 310 movdqu 16(%rcx),%xmm2 | 319 movdqu 16(%rcx),%xmm2 |
| 311 movdqu (%r8),%xmm3 | 320 movdqu (%r8),%xmm3 |
| 312 movdqa .Lrot16(%rip),%xmm6 | 321 movdqa .Lrot16(%rip),%xmm6 |
| 313 movdqa .Lrot24(%rip),%xmm7 | 322 movdqa .Lrot24(%rip),%xmm7 |
| 314 | 323 |
| 315 movdqa %xmm0,0(%rsp) | 324 movdqa %xmm0,0(%rsp) |
| 316 movdqa %xmm1,16(%rsp) | 325 movdqa %xmm1,16(%rsp) |
| 317 movdqa %xmm2,32(%rsp) | 326 movdqa %xmm2,32(%rsp) |
| 318 movdqa %xmm3,48(%rsp) | 327 movdqa %xmm3,48(%rsp) |
| 319 » movl» $10,%ebp | 328 » movq» $10,%r8 |
| 320 jmp .Loop_ssse3 | 329 jmp .Loop_ssse3 |
| 321 | 330 |
| 322 .align 32 | 331 .align 32 |
| 323 .Loop_outer_ssse3: | 332 .Loop_outer_ssse3: |
| 324 movdqa .Lone(%rip),%xmm3 | 333 movdqa .Lone(%rip),%xmm3 |
| 325 movdqa 0(%rsp),%xmm0 | 334 movdqa 0(%rsp),%xmm0 |
| 326 movdqa 16(%rsp),%xmm1 | 335 movdqa 16(%rsp),%xmm1 |
| 327 movdqa 32(%rsp),%xmm2 | 336 movdqa 32(%rsp),%xmm2 |
| 328 paddd 48(%rsp),%xmm3 | 337 paddd 48(%rsp),%xmm3 |
| 329 » movl» $10,%ebp | 338 » movq» $10,%r8 |
| 330 movdqa %xmm3,48(%rsp) | 339 movdqa %xmm3,48(%rsp) |
| 331 jmp .Loop_ssse3 | 340 jmp .Loop_ssse3 |
| 332 | 341 |
| 333 .align 32 | 342 .align 32 |
| 334 .Loop_ssse3: | 343 .Loop_ssse3: |
| 335 paddd %xmm1,%xmm0 | 344 paddd %xmm1,%xmm0 |
| 336 pxor %xmm0,%xmm3 | 345 pxor %xmm0,%xmm3 |
| 337 .byte 102,15,56,0,222 | 346 .byte 102,15,56,0,222 |
| 338 paddd %xmm3,%xmm2 | 347 paddd %xmm3,%xmm2 |
| 339 pxor %xmm2,%xmm1 | 348 pxor %xmm2,%xmm1 |
| (...skipping 28 matching lines...) Expand all Loading... |
| 368 .byte 102,15,56,0,223 | 377 .byte 102,15,56,0,223 |
| 369 paddd %xmm3,%xmm2 | 378 paddd %xmm3,%xmm2 |
| 370 pxor %xmm2,%xmm1 | 379 pxor %xmm2,%xmm1 |
| 371 movdqa %xmm1,%xmm4 | 380 movdqa %xmm1,%xmm4 |
| 372 psrld $25,%xmm1 | 381 psrld $25,%xmm1 |
| 373 pslld $7,%xmm4 | 382 pslld $7,%xmm4 |
| 374 por %xmm4,%xmm1 | 383 por %xmm4,%xmm1 |
| 375 pshufd $78,%xmm2,%xmm2 | 384 pshufd $78,%xmm2,%xmm2 |
| 376 pshufd $147,%xmm1,%xmm1 | 385 pshufd $147,%xmm1,%xmm1 |
| 377 pshufd $57,%xmm3,%xmm3 | 386 pshufd $57,%xmm3,%xmm3 |
| 378 » decl» %ebp | 387 » decq» %r8 |
| 379 jnz .Loop_ssse3 | 388 jnz .Loop_ssse3 |
| 380 paddd 0(%rsp),%xmm0 | 389 paddd 0(%rsp),%xmm0 |
| 381 paddd 16(%rsp),%xmm1 | 390 paddd 16(%rsp),%xmm1 |
| 382 paddd 32(%rsp),%xmm2 | 391 paddd 32(%rsp),%xmm2 |
| 383 paddd 48(%rsp),%xmm3 | 392 paddd 48(%rsp),%xmm3 |
| 384 | 393 |
| 385 cmpq $64,%rdx | 394 cmpq $64,%rdx |
| 386 jb .Ltail_ssse3 | 395 jb .Ltail_ssse3 |
| 387 | 396 |
| 388 movdqu 0(%rsi),%xmm4 | 397 movdqu 0(%rsi),%xmm4 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 405 jnz .Loop_outer_ssse3 | 414 jnz .Loop_outer_ssse3 |
| 406 | 415 |
| 407 jmp .Ldone_ssse3 | 416 jmp .Ldone_ssse3 |
| 408 | 417 |
| 409 .align 16 | 418 .align 16 |
| 410 .Ltail_ssse3: | 419 .Ltail_ssse3: |
| 411 movdqa %xmm0,0(%rsp) | 420 movdqa %xmm0,0(%rsp) |
| 412 movdqa %xmm1,16(%rsp) | 421 movdqa %xmm1,16(%rsp) |
| 413 movdqa %xmm2,32(%rsp) | 422 movdqa %xmm2,32(%rsp) |
| 414 movdqa %xmm3,48(%rsp) | 423 movdqa %xmm3,48(%rsp) |
| 415 » xorq» %rbx,%rbx | 424 » xorq» %r8,%r8 |
| 416 | 425 |
| 417 .Loop_tail_ssse3: | 426 .Loop_tail_ssse3: |
| 418 » movzbl» (%rsi,%rbx,1),%eax | 427 » movzbl» (%rsi,%r8,1),%eax |
| 419 » movzbl» (%rsp,%rbx,1),%ecx | 428 » movzbl» (%rsp,%r8,1),%ecx |
| 420 » leaq» 1(%rbx),%rbx | 429 » leaq» 1(%r8),%r8 |
| 421 xorl %ecx,%eax | 430 xorl %ecx,%eax |
| 422 » movb» %al,-1(%rdi,%rbx,1) | 431 » movb» %al,-1(%rdi,%r8,1) |
| 423 decq %rdx | 432 decq %rdx |
| 424 jnz .Loop_tail_ssse3 | 433 jnz .Loop_tail_ssse3 |
| 425 | 434 |
| 426 .Ldone_ssse3: | 435 .Ldone_ssse3: |
| 427 » addq» $64+24,%rsp | 436 » addq» $64+24+48,%rsp |
| 428 » popq» %r15 | |
| 429 » popq» %r14 | |
| 430 » popq» %r13 | |
| 431 » popq» %r12 | |
| 432 » popq» %rbp | |
| 433 » popq» %rbx | |
| 434 .byte 0xf3,0xc3 | 437 .byte 0xf3,0xc3 |
| 435 .size ChaCha20_ssse3,.-ChaCha20_ssse3 | 438 .size ChaCha20_ssse3,.-ChaCha20_ssse3 |
| 436 .type ChaCha20_4x,@function | 439 .type ChaCha20_4x,@function |
| 437 .align 32 | 440 .align 32 |
| 438 ChaCha20_4x: | 441 ChaCha20_4x: |
| 439 .LChaCha20_4x: | 442 .LChaCha20_4x: |
| 440 movq %r10,%r11 | 443 movq %r10,%r11 |
| 441 shrq $32,%r10 | 444 shrq $32,%r10 |
| 442 testq $32,%r10 | 445 testq $32,%r10 |
| 443 jnz .LChaCha20_8x | 446 jnz .LChaCha20_8x |
| (...skipping 1132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1576 movb %al,-1(%rdi,%r10,1) | 1579 movb %al,-1(%rdi,%r10,1) |
| 1577 decq %rdx | 1580 decq %rdx |
| 1578 jnz .Loop_tail8x | 1581 jnz .Loop_tail8x |
| 1579 | 1582 |
| 1580 .Ldone8x: | 1583 .Ldone8x: |
| 1581 vzeroall | 1584 vzeroall |
| 1582 movq 640(%rsp),%rsp | 1585 movq 640(%rsp),%rsp |
| 1583 .byte 0xf3,0xc3 | 1586 .byte 0xf3,0xc3 |
| 1584 .size ChaCha20_8x,.-ChaCha20_8x | 1587 .size ChaCha20_8x,.-ChaCha20_8x |
| 1585 #endif | 1588 #endif |
| OLD | NEW |