OLD | NEW |
1 default rel | 1 default rel |
2 %define XMMWORD | 2 %define XMMWORD |
3 %define YMMWORD | 3 %define YMMWORD |
4 %define ZMMWORD | 4 %define ZMMWORD |
5 section .text code align=64 | 5 section .text code align=64 |
6 | 6 |
7 | 7 |
8 EXTERN OPENSSL_ia32cap_P | 8 EXTERN OPENSSL_ia32cap_P |
9 | 9 |
10 ALIGN 64 | 10 ALIGN 64 |
11 $L$zero: | 11 $L$zero: |
12 DD 0,0,0,0 | 12 DD 0,0,0,0 |
13 $L$one: | 13 $L$one: |
14 DD 1,0,0,0 | 14 DD 1,0,0,0 |
15 $L$inc: | 15 $L$inc: |
16 DD 0,1,2,3 | 16 DD 0,1,2,3 |
17 $L$four: | 17 $L$four: |
18 DD 4,4,4,4 | 18 DD 4,4,4,4 |
19 $L$incy: | 19 $L$incy: |
20 DD 0,2,4,6,1,3,5,7 | 20 DD 0,2,4,6,1,3,5,7 |
21 $L$eight: | 21 $L$eight: |
22 DD 8,8,8,8,8,8,8,8 | 22 DD 8,8,8,8,8,8,8,8 |
23 $L$rot16: | 23 $L$rot16: |
24 DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd | 24 DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd |
25 $L$rot24: | 25 $L$rot24: |
26 DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe | 26 DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe |
27 $L$sigma: | 27 $L$sigma: |
28 DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107 | 28 DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107 |
29 DB 0 | 29 DB 0 |
| 30 ALIGN 64 |
| 31 $L$zeroz: |
| 32 DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0 |
| 33 $L$fourz: |
| 34 DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0 |
| 35 $L$incz: |
| 36 DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 |
| 37 $L$sixteen: |
| 38 DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 |
30 DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 | 39 DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 |
31 DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 | 40 DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 |
32 DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115 | 41 DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115 |
33 DB 108,46,111,114,103,62,0 | 42 DB 108,46,111,114,103,62,0 |
34 global ChaCha20_ctr32 | 43 global ChaCha20_ctr32 |
35 | 44 |
36 ALIGN 64 | 45 ALIGN 64 |
37 ChaCha20_ctr32: | 46 ChaCha20_ctr32: |
38 mov QWORD[8+rsp],rdi ;WIN64 prologue | 47 mov QWORD[8+rsp],rdi ;WIN64 prologue |
39 mov QWORD[16+rsp],rsi | 48 mov QWORD[16+rsp],rsi |
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
341 movdqu xmm1,XMMWORD[rcx] | 350 movdqu xmm1,XMMWORD[rcx] |
342 movdqu xmm2,XMMWORD[16+rcx] | 351 movdqu xmm2,XMMWORD[16+rcx] |
343 movdqu xmm3,XMMWORD[r8] | 352 movdqu xmm3,XMMWORD[r8] |
344 movdqa xmm6,XMMWORD[$L$rot16] | 353 movdqa xmm6,XMMWORD[$L$rot16] |
345 movdqa xmm7,XMMWORD[$L$rot24] | 354 movdqa xmm7,XMMWORD[$L$rot24] |
346 | 355 |
347 movdqa XMMWORD[rsp],xmm0 | 356 movdqa XMMWORD[rsp],xmm0 |
348 movdqa XMMWORD[16+rsp],xmm1 | 357 movdqa XMMWORD[16+rsp],xmm1 |
349 movdqa XMMWORD[32+rsp],xmm2 | 358 movdqa XMMWORD[32+rsp],xmm2 |
350 movdqa XMMWORD[48+rsp],xmm3 | 359 movdqa XMMWORD[48+rsp],xmm3 |
351 » mov» ebp,10 | 360 » mov» r8,10 |
352 jmp NEAR $L$oop_ssse3 | 361 jmp NEAR $L$oop_ssse3 |
353 | 362 |
354 ALIGN 32 | 363 ALIGN 32 |
355 $L$oop_outer_ssse3: | 364 $L$oop_outer_ssse3: |
356 movdqa xmm3,XMMWORD[$L$one] | 365 movdqa xmm3,XMMWORD[$L$one] |
357 movdqa xmm0,XMMWORD[rsp] | 366 movdqa xmm0,XMMWORD[rsp] |
358 movdqa xmm1,XMMWORD[16+rsp] | 367 movdqa xmm1,XMMWORD[16+rsp] |
359 movdqa xmm2,XMMWORD[32+rsp] | 368 movdqa xmm2,XMMWORD[32+rsp] |
360 paddd xmm3,XMMWORD[48+rsp] | 369 paddd xmm3,XMMWORD[48+rsp] |
361 » mov» ebp,10 | 370 » mov» r8,10 |
362 movdqa XMMWORD[48+rsp],xmm3 | 371 movdqa XMMWORD[48+rsp],xmm3 |
363 jmp NEAR $L$oop_ssse3 | 372 jmp NEAR $L$oop_ssse3 |
364 | 373 |
365 ALIGN 32 | 374 ALIGN 32 |
366 $L$oop_ssse3: | 375 $L$oop_ssse3: |
367 paddd xmm0,xmm1 | 376 paddd xmm0,xmm1 |
368 pxor xmm3,xmm0 | 377 pxor xmm3,xmm0 |
369 DB 102,15,56,0,222 | 378 DB 102,15,56,0,222 |
370 paddd xmm2,xmm3 | 379 paddd xmm2,xmm3 |
371 pxor xmm1,xmm2 | 380 pxor xmm1,xmm2 |
(...skipping 28 matching lines...) Expand all Loading... |
400 DB 102,15,56,0,223 | 409 DB 102,15,56,0,223 |
401 paddd xmm2,xmm3 | 410 paddd xmm2,xmm3 |
402 pxor xmm1,xmm2 | 411 pxor xmm1,xmm2 |
403 movdqa xmm4,xmm1 | 412 movdqa xmm4,xmm1 |
404 psrld xmm1,25 | 413 psrld xmm1,25 |
405 pslld xmm4,7 | 414 pslld xmm4,7 |
406 por xmm1,xmm4 | 415 por xmm1,xmm4 |
407 pshufd xmm2,xmm2,78 | 416 pshufd xmm2,xmm2,78 |
408 pshufd xmm1,xmm1,147 | 417 pshufd xmm1,xmm1,147 |
409 pshufd xmm3,xmm3,57 | 418 pshufd xmm3,xmm3,57 |
410 » dec» ebp | 419 » dec» r8 |
411 jnz NEAR $L$oop_ssse3 | 420 jnz NEAR $L$oop_ssse3 |
412 paddd xmm0,XMMWORD[rsp] | 421 paddd xmm0,XMMWORD[rsp] |
413 paddd xmm1,XMMWORD[16+rsp] | 422 paddd xmm1,XMMWORD[16+rsp] |
414 paddd xmm2,XMMWORD[32+rsp] | 423 paddd xmm2,XMMWORD[32+rsp] |
415 paddd xmm3,XMMWORD[48+rsp] | 424 paddd xmm3,XMMWORD[48+rsp] |
416 | 425 |
417 cmp rdx,64 | 426 cmp rdx,64 |
418 jb NEAR $L$tail_ssse3 | 427 jb NEAR $L$tail_ssse3 |
419 | 428 |
420 movdqu xmm4,XMMWORD[rsi] | 429 movdqu xmm4,XMMWORD[rsi] |
(...skipping 16 matching lines...) Expand all Loading... |
437 jnz NEAR $L$oop_outer_ssse3 | 446 jnz NEAR $L$oop_outer_ssse3 |
438 | 447 |
439 jmp NEAR $L$done_ssse3 | 448 jmp NEAR $L$done_ssse3 |
440 | 449 |
441 ALIGN 16 | 450 ALIGN 16 |
442 $L$tail_ssse3: | 451 $L$tail_ssse3: |
443 movdqa XMMWORD[rsp],xmm0 | 452 movdqa XMMWORD[rsp],xmm0 |
444 movdqa XMMWORD[16+rsp],xmm1 | 453 movdqa XMMWORD[16+rsp],xmm1 |
445 movdqa XMMWORD[32+rsp],xmm2 | 454 movdqa XMMWORD[32+rsp],xmm2 |
446 movdqa XMMWORD[48+rsp],xmm3 | 455 movdqa XMMWORD[48+rsp],xmm3 |
447 » xor» rbx,rbx | 456 » xor» r8,r8 |
448 | 457 |
449 $L$oop_tail_ssse3: | 458 $L$oop_tail_ssse3: |
450 » movzx» eax,BYTE[rbx*1+rsi] | 459 » movzx» eax,BYTE[r8*1+rsi] |
451 » movzx» ecx,BYTE[rbx*1+rsp] | 460 » movzx» ecx,BYTE[r8*1+rsp] |
452 » lea» rbx,[1+rbx] | 461 » lea» r8,[1+r8] |
453 xor eax,ecx | 462 xor eax,ecx |
454 » mov» BYTE[((-1))+rbx*1+rdi],al | 463 » mov» BYTE[((-1))+r8*1+rdi],al |
455 dec rdx | 464 dec rdx |
456 jnz NEAR $L$oop_tail_ssse3 | 465 jnz NEAR $L$oop_tail_ssse3 |
457 | 466 |
458 $L$done_ssse3: | 467 $L$done_ssse3: |
459 movaps xmm6,XMMWORD[((64+32))+rsp] | 468 movaps xmm6,XMMWORD[((64+32))+rsp] |
460 movaps xmm7,XMMWORD[((64+48))+rsp] | 469 movaps xmm7,XMMWORD[((64+48))+rsp] |
461 » add» rsp,64+72 | 470 » add» rsp,64+72+48 |
462 » pop» r15 | |
463 » pop» r14 | |
464 » pop» r13 | |
465 » pop» r12 | |
466 » pop» rbp | |
467 » pop» rbx | |
468 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | 471 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
469 mov rsi,QWORD[16+rsp] | 472 mov rsi,QWORD[16+rsp] |
470 DB 0F3h,0C3h ;repret | 473 DB 0F3h,0C3h ;repret |
471 $L$SEH_end_ChaCha20_ssse3: | 474 $L$SEH_end_ChaCha20_ssse3: |
472 | 475 |
473 ALIGN 32 | 476 ALIGN 32 |
474 ChaCha20_4x: | 477 ChaCha20_4x: |
475 mov QWORD[8+rsp],rdi ;WIN64 prologue | 478 mov QWORD[8+rsp],rdi ;WIN64 prologue |
476 mov QWORD[16+rsp],rsi | 479 mov QWORD[16+rsp],rsi |
477 mov rax,rsp | 480 mov rax,rsp |
(...skipping 1202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1680 movaps xmm11,XMMWORD[32+r11] | 1683 movaps xmm11,XMMWORD[32+r11] |
1681 movaps xmm12,XMMWORD[48+r11] | 1684 movaps xmm12,XMMWORD[48+r11] |
1682 movaps xmm13,XMMWORD[64+r11] | 1685 movaps xmm13,XMMWORD[64+r11] |
1683 movaps xmm14,XMMWORD[80+r11] | 1686 movaps xmm14,XMMWORD[80+r11] |
1684 movaps xmm15,XMMWORD[96+r11] | 1687 movaps xmm15,XMMWORD[96+r11] |
1685 mov rsp,QWORD[640+rsp] | 1688 mov rsp,QWORD[640+rsp] |
1686 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | 1689 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
1687 mov rsi,QWORD[16+rsp] | 1690 mov rsi,QWORD[16+rsp] |
1688 DB 0F3h,0C3h ;repret | 1691 DB 0F3h,0C3h ;repret |
1689 $L$SEH_end_ChaCha20_8x: | 1692 $L$SEH_end_ChaCha20_8x: |
OLD | NEW |