OLD | NEW |
(Empty) | |
| 1 ; This Source Code Form is subject to the terms of the Mozilla Public |
| 2 ; License, v. 2.0. If a copy of the MPL was not distributed with this |
| 3 ; file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 4 |
| 5 ; ** ARCFOUR implementation optimized for AMD64. |
| 6 ; ** |
| 7 ; ** The throughput achieved by this code is about 320 MBytes/sec, on |
| 8 ; ** a 1.8 GHz AMD Opteron (rev C0) processor. |
| 9 |
| 10 .CODE |
| 11 |
| 12 ; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen, |
| 13 ; const unsigned char *input, unsigned char *output); |
| 14 |
| 15 |
| 16 ARCFOUR PROC |
| 17 |
| 18 push rbp |
| 19 push rbx |
| 20 push rsi |
| 21 push rdi |
| 22 |
| 23 mov rbp, rcx ; key = ARG(key) |
| 24 mov rbx, rdx ; rbx = ARG(len) |
| 25 mov rsi, r8 ; in = ARG(in) |
| 26 mov rdi, r9 ; out = ARG(out) |
| 27 mov rcx, [rbp] ; x = key->x |
| 28 mov rdx, [rbp+8] ; y = key->y |
| 29 add rbp, 16 ; d = key->data |
| 30 inc rcx ; x++ |
| 31 and rcx, 0ffh ; x &= 0xff |
| 32 lea rbx, [rbx+rsi-8] ; rbx = in+len-8 |
| 33 mov r9, rbx ; tmp = in+len-8 |
| 34 mov rax, [rbp+rcx*8] ; tx = d[x] |
| 35 cmp rbx, rsi ; cmp in with in+len-8 |
| 36 jl Lend ; jump if (in+len-8 < in) |
| 37 |
| 38 Lstart: |
| 39 add rsi, 8 ; increment in |
| 40 add rdi, 8 ; increment out |
| 41 |
| 42 ; |
| 43 ; generate the next 8 bytes of the rc4 stream into r8 |
| 44 ; |
| 45 |
| 46 mov r11, 8 ; byte counter |
| 47 |
| 48 @@: |
| 49 add dl, al ; y += tx |
| 50 mov ebx, [rbp+rdx*8] ; ty = d[y] |
| 51 mov [rbp+rcx*8], ebx ; d[x] = ty |
| 52 add bl, al ; val = ty + tx |
| 53 mov [rbp+rdx*8], eax ; d[y] = tx |
| 54 inc cl ; x++ (NEXT ROUND) |
| 55 mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND) |
| 56 mov r8b, [rbp+rbx*8] ; val = d[val] |
| 57 dec r11b |
| 58 ror r8, 8 ; (ror does not change ZF) |
| 59 jnz @b |
| 60 |
| 61 ; |
| 62 ; xor 8 bytes |
| 63 ; |
| 64 |
| 65 xor r8, [rsi-8] |
| 66 cmp rsi, r9 ; cmp in+len-8 with in |
| 67 mov [rdi-8], r8 |
| 68 jle Lstart |
| 69 |
| 70 Lend: |
| 71 add r9, 8 ; tmp = in+len |
| 72 |
| 73 ; |
| 74 ; handle the last bytes, one by one |
| 75 ; |
| 76 |
| 77 @@: |
| 78 cmp r9, rsi ; cmp in with in+len |
| 79 jle Lfinished ; jump if (in+len <= in) |
| 80 add dl, al ; y += tx |
| 81 mov ebx, [rbp+rdx*8] ; ty = d[y] |
| 82 mov [rbp+rcx*8], ebx ; d[x] = ty |
| 83 add bl, al ; val = ty + tx |
| 84 mov [rbp+rdx*8], eax ; d[y] = tx |
| 85 inc cl ; x++ (NEXT ROUND) |
| 86 mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND) |
| 87 mov r8b, [rbp+rbx*8] ; val = d[val] |
| 88 xor r8b, [rsi] ; xor 1 byte |
| 89 mov [rdi], r8b |
| 90 inc rsi ; in++ |
| 91 inc rdi |
| 92 jmp @b |
| 93 |
| 94 Lfinished: |
| 95 dec rcx ; x-- |
| 96 mov [rbp-8], dl ; key->y = y |
| 97 mov [rbp-16], cl ; key->x = x |
| 98 |
| 99 pop rdi |
| 100 pop rsi |
| 101 pop rbx |
| 102 pop rbp |
| 103 ret |
| 104 |
| 105 ARCFOUR ENDP |
| 106 |
| 107 END |
OLD | NEW |