Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(264)

Side by Side Diff: mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm

Issue 11738002: Include 64-bit optimized assembly on Windows when building x64 (Closed) Base URL: http://src.chromium.org/svn/trunk/deps/third_party/nss
Patch Set: Update checkout script Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ; This Source Code Form is subject to the terms of the Mozilla Public
2 ; License, v. 2.0. If a copy of the MPL was not distributed with this
3 ; file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5 ;
6 ; This code is converted from mpi_amd64_gas.asm for MASM for x64.
7 ;
8
9 ; ------------------------------------------------------------------------
10 ;
11 ; Implementation of s_mpv_mul_set_vec which exploits
12 ; the 64X64->128 bit unsigned multiply instruction.
13 ;
14 ; ------------------------------------------------------------------------
15
16 ; r = a * digit, r and a are vectors of length len
17 ; returns the carry digit
18 ; r and a are 64 bit aligned.
19 ;
20 ; uint64_t
21 ; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
22 ;
23
24 .CODE
25
26 s_mpv_mul_set_vec64 PROC
27
28 ; compatibilities for paramenter registers
29 ;
30 ; About GAS and MASM, the usage of parameter registers are different.
31
32 push rdi
33 push rsi
34
35 mov rdi, rcx
36 mov rsi, rdx
37 mov edx, r8d
38 mov rcx, r9
39
40 xor rax, rax
41 test rdx, rdx
42 jz L17
43 mov r8, rdx
44 xor r9, r9
45
46 L15:
47 cmp r8, 8
48 jb L16
49 mov rax, [rsi]
50 mov r11, [8+rsi]
51 mul rcx
52 add rax, r9
53 adc rdx, 0
54 mov [0+rdi], rax
55 mov r9, rdx
56 mov rax,r11
57 mov r11, [16+rsi]
58 mul rcx
59 add rax,r9
60 adc rdx,0
61 mov [8+rdi],rax
62 mov r9,rdx
63 mov rax,r11
64 mov r11, [24+rsi]
65 mul rcx
66 add rax,r9
67 adc rdx,0
68 mov [16+rdi],rax
69 mov r9,rdx
70 mov rax,r11
71 mov r11, [32+rsi]
72 mul rcx
73 add rax,r9
74 adc rdx,0
75 mov [24+rdi],rax
76 mov r9,rdx
77 mov rax,r11
78 mov r11, [40+rsi]
79 mul rcx
80 add rax,r9
81 adc rdx,0
82 mov [32+rdi],rax
83 mov r9,rdx
84 mov rax,r11
85 mov r11, [48+rsi]
86 mul rcx
87 add rax,r9
88 adc rdx,0
89 mov [40+rdi],rax
90 mov r9,rdx
91 mov rax,r11
92 mov r11, [56+rsi]
93 mul rcx
94 add rax,r9
95 adc rdx,0
96 mov [48+rdi],rax
97 mov r9,rdx
98 mov rax,r11
99 mul rcx
100 add rax,r9
101 adc rdx,0
102 mov [56+rdi],rax
103 mov r9,rdx
104 add rsi, 64
105 add rdi, 64
106 sub r8, 8
107 jz L17
108 jmp L15
109
110 L16:
111 mov rax, [0+rsi]
112 mul rcx
113 add rax, r9
114 adc rdx,0
115 mov [0+rdi],rax
116 mov r9,rdx
117 dec r8
118 jz L17
119 mov rax, [8+rsi]
120 mul rcx
121 add rax,r9
122 adc rdx,0
123 mov [8+rdi], rax
124 mov r9, rdx
125 dec r8
126 jz L17
127 mov rax, [16+rsi]
128 mul rcx
129 add rax, r9
130 adc rdx, 0
131 mov [16+rdi],rax
132 mov r9,rdx
133 dec r8
134 jz L17
135 mov rax, [24+rsi]
136 mul rcx
137 add rax, r9
138 adc rdx, 0
139 mov [24+rdi], rax
140 mov r9, rdx
141 dec r8
142 jz L17
143 mov rax, [32+rsi]
144 mul rcx
145 add rax, r9
146 adc rdx, 0
147 mov [32+rdi],rax
148 mov r9, rdx
149 dec r8
150 jz L17
151 mov rax, [40+rsi]
152 mul rcx
153 add rax, r9
154 adc rdx, 0
155 mov [40+rdi], rax
156 mov r9, rdx
157 dec r8
158 jz L17
159 mov rax, [48+rsi]
160 mul rcx
161 add rax, r9
162 adc rdx, 0
163 mov [48+rdi], rax
164 mov r9, rdx
165 dec r8
166 jz L17
167
168 L17:
169 mov rax, r9
170 pop rsi
171 pop rdi
172 ret
173
174 s_mpv_mul_set_vec64 ENDP
175
176
177 ;------------------------------------------------------------------------
178 ;
179 ; Implementation of s_mpv_mul_add_vec which exploits
180 ; the 64X64->128 bit unsigned multiply instruction.
181 ;
182 ;------------------------------------------------------------------------
183
184 ; r += a * digit, r and a are vectors of length len
185 ; returns the carry digit
186 ; r and a are 64 bit aligned.
187 ;
188 ; uint64_t
189 ; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
190 ;
191
192 s_mpv_mul_add_vec64 PROC
193
194 ; compatibilities for paramenter registers
195 ;
196 ; About GAS and MASM, the usage of parameter registers are different.
197
198 push rdi
199 push rsi
200
201 mov rdi, rcx
202 mov rsi, rdx
203 mov edx, r8d
204 mov rcx, r9
205
206 xor rax, rax
207 test rdx, rdx
208 jz L27
209 mov r8, rdx
210 xor r9, r9
211
212 L25:
213 cmp r8, 8
214 jb L26
215 mov rax, [0+rsi]
216 mov r10, [0+rdi]
217 mov r11, [8+rsi]
218 mul rcx
219 add rax,r10
220 adc rdx,0
221 mov r10, [8+rdi]
222 add rax,r9
223 adc rdx,0
224 mov [0+rdi],rax
225 mov r9,rdx
226 mov rax,r11
227 mov r11, [16+rsi]
228 mul rcx
229 add rax,r10
230 adc rdx,0
231 mov r10, [16+rdi]
232 add rax,r9
233 adc rdx,0
234 mov [8+rdi],rax
235 mov r9,rdx
236 mov rax,r11
237 mov r11, [24+rsi]
238 mul rcx
239 add rax,r10
240 adc rdx,0
241 mov r10, [24+rdi]
242 add rax,r9
243 adc rdx,0
244 mov [16+rdi],rax
245 mov r9,rdx
246 mov rax,r11
247 mov r11, [32+rsi]
248 mul rcx
249 add rax,r10
250 adc rdx,0
251 mov r10, [32+rdi]
252 add rax,r9
253 adc rdx,0
254 mov [24+rdi],rax
255 mov r9,rdx
256 mov rax,r11
257 mov r11, [40+rsi]
258 mul rcx
259 add rax,r10
260 adc rdx,0
261 mov r10, [40+rdi]
262 add rax,r9
263 adc rdx,0
264 mov [32+rdi],rax
265 mov r9,rdx
266 mov rax,r11
267 mov r11, [48+rsi]
268 mul rcx
269 add rax,r10
270 adc rdx,0
271 mov r10, [48+rdi]
272 add rax,r9
273 adc rdx,0
274 mov [40+rdi],rax
275 mov r9,rdx
276 mov rax,r11
277 mov r11, [56+rsi]
278 mul rcx
279 add rax,r10
280 adc rdx,0
281 mov r10, [56+rdi]
282 add rax,r9
283 adc rdx,0
284 mov [48+rdi],rax
285 mov r9,rdx
286 mov rax,r11
287 mul rcx
288 add rax,r10
289 adc rdx,0
290 add rax,r9
291 adc rdx,0
292 mov [56+rdi],rax
293 mov r9,rdx
294 add rsi,64
295 add rdi,64
296 sub r8, 8
297 jz L27
298 jmp L25
299
300 L26:
301 mov rax, [0+rsi]
302 mov r10, [0+rdi]
303 mul rcx
304 add rax,r10
305 adc rdx,0
306 add rax,r9
307 adc rdx,0
308 mov [0+rdi],rax
309 mov r9,rdx
310 dec r8
311 jz L27
312 mov rax, [8+rsi]
313 mov r10, [8+rdi]
314 mul rcx
315 add rax,r10
316 adc rdx,0
317 add rax,r9
318 adc rdx,0
319 mov [8+rdi],rax
320 mov r9,rdx
321 dec r8
322 jz L27
323 mov rax, [16+rsi]
324 mov r10, [16+rdi]
325 mul rcx
326 add rax,r10
327 adc rdx,0
328 add rax,r9
329 adc rdx,0
330 mov [16+rdi],rax
331 mov r9,rdx
332 dec r8
333 jz L27
334 mov rax, [24+rsi]
335 mov r10, [24+rdi]
336 mul rcx
337 add rax,r10
338 adc rdx,0
339 add rax,r9
340 adc rdx,0
341 mov [24+rdi],rax
342 mov r9,rdx
343 dec r8
344 jz L27
345 mov rax, [32+rsi]
346 mov r10, [32+rdi]
347 mul rcx
348 add rax,r10
349 adc rdx,0
350 add rax,r9
351 adc rdx,0
352 mov [32+rdi],rax
353 mov r9,rdx
354 dec r8
355 jz L27
356 mov rax, [40+rsi]
357 mov r10, [40+rdi]
358 mul rcx
359 add rax,r10
360 adc rdx,0
361 add rax,r9
362 adc rdx,0
363 mov [40+rdi],rax
364 mov r9,rdx
365 dec r8
366 jz L27
367 mov rax, [48+rsi]
368 mov r10, [48+rdi]
369 mul rcx
370 add rax,r10
371 adc rdx,0
372 add rax, r9
373 adc rdx, 0
374 mov [48+rdi], rax
375 mov r9, rdx
376 dec r8
377 jz L27
378
379 L27:
380 mov rax, r9
381
382 pop rsi
383 pop rdi
384 ret
385
386 s_mpv_mul_add_vec64 ENDP
387
388 END
OLDNEW
« no previous file with comments | « mozilla/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm ('k') | nss.gyp » ('j') | nss.gyp » ('J')

Powered by Google App Engine
This is Rietveld 408576698