OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 | 4 |
5 | 5 |
6 .p2align 6 | 6 .p2align 6 |
7 L$zero: | 7 L$zero: |
8 .long 0,0,0,0 | 8 .long 0,0,0,0 |
9 L$one: | 9 L$one: |
10 .long 1,0,0,0 | 10 .long 1,0,0,0 |
11 L$inc: | 11 L$inc: |
12 .long 0,1,2,3 | 12 .long 0,1,2,3 |
13 L$four: | 13 L$four: |
14 .long 4,4,4,4 | 14 .long 4,4,4,4 |
15 L$incy: | 15 L$incy: |
16 .long 0,2,4,6,1,3,5,7 | 16 .long 0,2,4,6,1,3,5,7 |
17 L$eight: | 17 L$eight: |
18 .long 8,8,8,8,8,8,8,8 | 18 .long 8,8,8,8,8,8,8,8 |
19 L$rot16: | 19 L$rot16: |
20 .byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd | 20 .byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd |
21 L$rot24: | 21 L$rot24: |
22 .byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe | 22 .byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe |
23 L$sigma: | 23 L$sigma: |
24 .byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 | 24 .byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 |
| 25 .p2align 6 |
| 26 L$zeroz: |
| 27 .long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 |
| 28 L$fourz: |
| 29 .long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 |
| 30 L$incz: |
| 31 .long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 |
| 32 L$sixteen: |
| 33 .long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 |
25 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,
82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110
,115,115,108,46,111,114,103,62,0 | 34 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,
82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110
,115,115,108,46,111,114,103,62,0 |
26 .globl _ChaCha20_ctr32 | 35 .globl _ChaCha20_ctr32 |
27 .private_extern _ChaCha20_ctr32 | 36 .private_extern _ChaCha20_ctr32 |
28 | 37 |
29 .p2align 6 | 38 .p2align 6 |
30 _ChaCha20_ctr32: | 39 _ChaCha20_ctr32: |
31 cmpq $0,%rdx | 40 cmpq $0,%rdx |
32 je L$no_data | 41 je L$no_data |
33 movq _OPENSSL_ia32cap_P+4(%rip),%r10 | 42 movq _OPENSSL_ia32cap_P+4(%rip),%r10 |
34 testl $512,%r10d | 43 testl $512,%r10d |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
308 movdqu (%rcx),%xmm1 | 317 movdqu (%rcx),%xmm1 |
309 movdqu 16(%rcx),%xmm2 | 318 movdqu 16(%rcx),%xmm2 |
310 movdqu (%r8),%xmm3 | 319 movdqu (%r8),%xmm3 |
311 movdqa L$rot16(%rip),%xmm6 | 320 movdqa L$rot16(%rip),%xmm6 |
312 movdqa L$rot24(%rip),%xmm7 | 321 movdqa L$rot24(%rip),%xmm7 |
313 | 322 |
314 movdqa %xmm0,0(%rsp) | 323 movdqa %xmm0,0(%rsp) |
315 movdqa %xmm1,16(%rsp) | 324 movdqa %xmm1,16(%rsp) |
316 movdqa %xmm2,32(%rsp) | 325 movdqa %xmm2,32(%rsp) |
317 movdqa %xmm3,48(%rsp) | 326 movdqa %xmm3,48(%rsp) |
318 » movl» $10,%ebp | 327 » movq» $10,%r8 |
319 jmp L$oop_ssse3 | 328 jmp L$oop_ssse3 |
320 | 329 |
321 .p2align 5 | 330 .p2align 5 |
322 L$oop_outer_ssse3: | 331 L$oop_outer_ssse3: |
323 movdqa L$one(%rip),%xmm3 | 332 movdqa L$one(%rip),%xmm3 |
324 movdqa 0(%rsp),%xmm0 | 333 movdqa 0(%rsp),%xmm0 |
325 movdqa 16(%rsp),%xmm1 | 334 movdqa 16(%rsp),%xmm1 |
326 movdqa 32(%rsp),%xmm2 | 335 movdqa 32(%rsp),%xmm2 |
327 paddd 48(%rsp),%xmm3 | 336 paddd 48(%rsp),%xmm3 |
328 » movl» $10,%ebp | 337 » movq» $10,%r8 |
329 movdqa %xmm3,48(%rsp) | 338 movdqa %xmm3,48(%rsp) |
330 jmp L$oop_ssse3 | 339 jmp L$oop_ssse3 |
331 | 340 |
332 .p2align 5 | 341 .p2align 5 |
333 L$oop_ssse3: | 342 L$oop_ssse3: |
334 paddd %xmm1,%xmm0 | 343 paddd %xmm1,%xmm0 |
335 pxor %xmm0,%xmm3 | 344 pxor %xmm0,%xmm3 |
336 .byte 102,15,56,0,222 | 345 .byte 102,15,56,0,222 |
337 paddd %xmm3,%xmm2 | 346 paddd %xmm3,%xmm2 |
338 pxor %xmm2,%xmm1 | 347 pxor %xmm2,%xmm1 |
(...skipping 28 matching lines...) Expand all Loading... |
367 .byte 102,15,56,0,223 | 376 .byte 102,15,56,0,223 |
368 paddd %xmm3,%xmm2 | 377 paddd %xmm3,%xmm2 |
369 pxor %xmm2,%xmm1 | 378 pxor %xmm2,%xmm1 |
370 movdqa %xmm1,%xmm4 | 379 movdqa %xmm1,%xmm4 |
371 psrld $25,%xmm1 | 380 psrld $25,%xmm1 |
372 pslld $7,%xmm4 | 381 pslld $7,%xmm4 |
373 por %xmm4,%xmm1 | 382 por %xmm4,%xmm1 |
374 pshufd $78,%xmm2,%xmm2 | 383 pshufd $78,%xmm2,%xmm2 |
375 pshufd $147,%xmm1,%xmm1 | 384 pshufd $147,%xmm1,%xmm1 |
376 pshufd $57,%xmm3,%xmm3 | 385 pshufd $57,%xmm3,%xmm3 |
377 » decl» %ebp | 386 » decq» %r8 |
378 jnz L$oop_ssse3 | 387 jnz L$oop_ssse3 |
379 paddd 0(%rsp),%xmm0 | 388 paddd 0(%rsp),%xmm0 |
380 paddd 16(%rsp),%xmm1 | 389 paddd 16(%rsp),%xmm1 |
381 paddd 32(%rsp),%xmm2 | 390 paddd 32(%rsp),%xmm2 |
382 paddd 48(%rsp),%xmm3 | 391 paddd 48(%rsp),%xmm3 |
383 | 392 |
384 cmpq $64,%rdx | 393 cmpq $64,%rdx |
385 jb L$tail_ssse3 | 394 jb L$tail_ssse3 |
386 | 395 |
387 movdqu 0(%rsi),%xmm4 | 396 movdqu 0(%rsi),%xmm4 |
(...skipping 16 matching lines...) Expand all Loading... |
404 jnz L$oop_outer_ssse3 | 413 jnz L$oop_outer_ssse3 |
405 | 414 |
406 jmp L$done_ssse3 | 415 jmp L$done_ssse3 |
407 | 416 |
408 .p2align 4 | 417 .p2align 4 |
409 L$tail_ssse3: | 418 L$tail_ssse3: |
410 movdqa %xmm0,0(%rsp) | 419 movdqa %xmm0,0(%rsp) |
411 movdqa %xmm1,16(%rsp) | 420 movdqa %xmm1,16(%rsp) |
412 movdqa %xmm2,32(%rsp) | 421 movdqa %xmm2,32(%rsp) |
413 movdqa %xmm3,48(%rsp) | 422 movdqa %xmm3,48(%rsp) |
414 » xorq» %rbx,%rbx | 423 » xorq» %r8,%r8 |
415 | 424 |
416 L$oop_tail_ssse3: | 425 L$oop_tail_ssse3: |
417 » movzbl» (%rsi,%rbx,1),%eax | 426 » movzbl» (%rsi,%r8,1),%eax |
418 » movzbl» (%rsp,%rbx,1),%ecx | 427 » movzbl» (%rsp,%r8,1),%ecx |
419 » leaq» 1(%rbx),%rbx | 428 » leaq» 1(%r8),%r8 |
420 xorl %ecx,%eax | 429 xorl %ecx,%eax |
421 » movb» %al,-1(%rdi,%rbx,1) | 430 » movb» %al,-1(%rdi,%r8,1) |
422 decq %rdx | 431 decq %rdx |
423 jnz L$oop_tail_ssse3 | 432 jnz L$oop_tail_ssse3 |
424 | 433 |
425 L$done_ssse3: | 434 L$done_ssse3: |
426 » addq» $64+24,%rsp | 435 » addq» $64+24+48,%rsp |
427 » popq» %r15 | |
428 » popq» %r14 | |
429 » popq» %r13 | |
430 » popq» %r12 | |
431 » popq» %rbp | |
432 » popq» %rbx | |
433 .byte 0xf3,0xc3 | 436 .byte 0xf3,0xc3 |
434 | 437 |
435 | 438 |
436 .p2align 5 | 439 .p2align 5 |
437 ChaCha20_4x: | 440 ChaCha20_4x: |
438 L$ChaCha20_4x: | 441 L$ChaCha20_4x: |
439 movq %r10,%r11 | 442 movq %r10,%r11 |
440 shrq $32,%r10 | 443 shrq $32,%r10 |
441 testq $32,%r10 | 444 testq $32,%r10 |
442 jnz L$ChaCha20_8x | 445 jnz L$ChaCha20_8x |
(...skipping 1132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1575 movb %al,-1(%rdi,%r10,1) | 1578 movb %al,-1(%rdi,%r10,1) |
1576 decq %rdx | 1579 decq %rdx |
1577 jnz L$oop_tail8x | 1580 jnz L$oop_tail8x |
1578 | 1581 |
1579 L$done8x: | 1582 L$done8x: |
1580 vzeroall | 1583 vzeroall |
1581 movq 640(%rsp),%rsp | 1584 movq 640(%rsp),%rsp |
1582 .byte 0xf3,0xc3 | 1585 .byte 0xf3,0xc3 |
1583 | 1586 |
1584 #endif | 1587 #endif |
OLD | NEW |