Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(38)

Side by Side Diff: mozilla/security/nss/lib/freebl/arcfour-amd64-masm.asm

Issue 11738002: Include 64-bit optimized assembly on Windows when building x64 (Closed) Base URL: http://src.chromium.org/svn/trunk/deps/third_party/nss
Patch Set: Update checkout script Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | mozilla/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm » ('j') | nss.gyp » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 ; This Source Code Form is subject to the terms of the Mozilla Public
2 ; License, v. 2.0. If a copy of the MPL was not distributed with this
3 ; file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5 ; ** ARCFOUR implementation optimized for AMD64.
6 ; **
7 ; ** The throughput achieved by this code is about 320 MBytes/sec, on
8 ; ** a 1.8 GHz AMD Opteron (rev C0) processor.
9
10 .CODE
11
12 ; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen,
13 ; const unsigned char *input, unsigned char *output);
14
15
16 ARCFOUR PROC
17
18 push rbp
19 push rbx
20 push rsi
21 push rdi
22
23 mov rbp, rcx ; key = ARG(key)
24 mov rbx, rdx ; rbx = ARG(len)
25 mov rsi, r8 ; in = ARG(in)
26 mov rdi, r9 ; out = ARG(out)
27 mov rcx, [rbp] ; x = key->x
28 mov rdx, [rbp+8] ; y = key->y
29 add rbp, 16 ; d = key->data
30 inc rcx ; x++
31 and rcx, 0ffh ; x &= 0xff
32 lea rbx, [rbx+rsi-8] ; rbx = in+len-8
33 mov r9, rbx ; tmp = in+len-8
34 mov rax, [rbp+rcx*8] ; tx = d[x]
35 cmp rbx, rsi ; cmp in with in+len-8
36 jl Lend ; jump if (in+len-8 < in)
37
38 Lstart:
39 add rsi, 8 ; increment in
40 add rdi, 8 ; increment out
41
42 ;
43 ; generate the next 8 bytes of the rc4 stream into r8
44 ;
45
46 mov r11, 8 ; byte counter
47
48 @@:
49 add dl, al ; y += tx
50 mov ebx, [rbp+rdx*8] ; ty = d[y]
51 mov [rbp+rcx*8], ebx ; d[x] = ty
52 add bl, al ; val = ty + tx
53 mov [rbp+rdx*8], eax ; d[y] = tx
54 inc cl ; x++ (NEXT ROUND)
55 mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND)
56 mov r8b, [rbp+rbx*8] ; val = d[val]
57 dec r11b
58 ror r8, 8 ; (ror does not change ZF)
59 jnz @b
60
61 ;
62 ; xor 8 bytes
63 ;
64
65 xor r8, [rsi-8]
66 cmp rsi, r9 ; cmp in+len-8 with in
67 mov [rdi-8], r8
68 jle Lstart
69
70 Lend:
71 add r9, 8 ; tmp = in+len
72
73 ;
74 ; handle the last bytes, one by one
75 ;
76
77 @@:
78 cmp r9, rsi ; cmp in with in+len
79 jle Lfinished ; jump if (in+len <= in)
80 add dl, al ; y += tx
81 mov ebx, [rbp+rdx*8] ; ty = d[y]
82 mov [rbp+rcx*8], ebx ; d[x] = ty
83 add bl, al ; val = ty + tx
84 mov [rbp+rdx*8], eax ; d[y] = tx
85 inc cl ; x++ (NEXT ROUND)
86 mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND)
87 mov r8b, [rbp+rbx*8] ; val = d[val]
88 xor r8b, [rsi] ; xor 1 byte
89 mov [rdi], r8b
90 inc rsi ; in++
91 inc rdi
92 jmp @b
93
94 Lfinished:
95 dec rcx ; x--
96 mov [rbp-8], dl ; key->y = y
97 mov [rbp-16], cl ; key->x = x
98
99 pop rdi
100 pop rsi
101 pop rbx
102 pop rbp
103 ret
104
105 ARCFOUR ENDP
106
107 END
OLDNEW
« no previous file with comments | « no previous file | mozilla/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm » ('j') | nss.gyp » ('J')

Powered by Google App Engine
This is Rietveld 408576698