OLD | NEW |
| (Empty) |
1 # This file contains a pre-compiled version of chacha_vec.c for ARM. This is | |
2 # needed to support switching on NEON code at runtime. If the whole of OpenSSL | |
3 # were to be compiled with the needed flags to build chacha_vec.c, then it | |
4 # wouldn't be possible to run on non-NEON systems. | |
5 # | |
6 # This file was generated by: | |
7 # | |
8 # /opt/gcc-linaro-arm-linux-gnueabihf-4.7-2012.10-20121022_linux/bin/arm-lin
ux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -S chacha_vec.c -I ../../include
-fpic -o chacha_vec_arm.S | |
9 # | |
10 # And then EABI attribute 28 was set to zero to allow linking with soft-float | |
11 # code. | |
12 | |
13 .syntax unified | |
14 .cpu cortex-a8 | |
15 .eabi_attribute 27, 3 | |
16 .eabi_attribute 28, 0 | |
17 .fpu neon | |
18 .eabi_attribute 20, 1 | |
19 .eabi_attribute 21, 1 | |
20 .eabi_attribute 23, 3 | |
21 .eabi_attribute 24, 1 | |
22 .eabi_attribute 25, 1 | |
23 .eabi_attribute 26, 2 | |
24 .eabi_attribute 30, 2 | |
25 .eabi_attribute 34, 1 | |
26 .eabi_attribute 18, 4 | |
27 .thumb | |
28 .file "chacha_vec.c" | |
29 .text | |
30 .align 2 | |
31 .global CRYPTO_chacha_20_neon | |
32 .thumb | |
33 .thumb_func | |
34 .type CRYPTO_chacha_20_neon, %function | |
35 CRYPTO_chacha_20_neon: | |
36 @ args = 8, pretend = 0, frame = 304 | |
37 @ frame_needed = 1, uses_anonymous_args = 0 | |
38 @ link register save eliminated. | |
39 push {r4, r5, r6, r7, r8, r9, sl, fp} | |
40 fstmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15} | |
41 sub sp, sp, #304 | |
42 add r7, sp, #0 | |
43 movw ip, #43691 | |
44 movt ip, 43690 | |
45 str r2, [r7, #196] | |
46 sub sp, sp, #96 | |
47 ldr r4, [r7, #196] | |
48 ldr r6, [r7, #400] | |
49 ldr r2, .L38+16 | |
50 umull r4, ip, ip, r4 | |
51 ldr r6, [r6, #0] | |
52 ldr r8, [r7, #400] | |
53 .LPIC24: | |
54 add r2, pc | |
55 add r4, sp, #15 | |
56 str r3, [r7, #244] | |
57 str r6, [r7, #176] | |
58 bic r4, r4, #15 | |
59 str r0, [r7, #188] | |
60 str r4, [r7, #200] | |
61 lsrs ip, ip, #7 | |
62 str r1, [r7, #184] | |
63 ldmia r2, {r0, r1, r2, r3} | |
64 ldr r4, [r8, #4] | |
65 ldr r5, [r7, #244] | |
66 vld1.64 {d24-d25}, [r5:64] | |
67 vldr d26, [r5, #16] | |
68 vldr d27, [r5, #24] | |
69 ldr r9, [r7, #200] | |
70 ldr r8, [r7, #404] | |
71 ldr r5, [r7, #176] | |
72 add r6, r9, #64 | |
73 str r4, [r7, #300] | |
74 mov r4, #0 | |
75 str r8, [r7, #288] | |
76 str r5, [r7, #296] | |
77 str r4, [r7, #292] | |
78 stmia r6, {r0, r1, r2, r3} | |
79 vldr d22, [r9, #64] | |
80 vldr d23, [r9, #72] | |
81 vldr d20, [r7, #288] | |
82 vldr d21, [r7, #296] | |
83 str ip, [r7, #192] | |
84 beq .L20 | |
85 lsl r6, ip, #1 | |
86 ldr r1, [r9, #68] | |
87 add r3, r6, ip | |
88 str r6, [r7, #180] | |
89 ldr r2, [r9, #72] | |
90 add r8, r8, #2 | |
91 ldr r5, [r9, #76] | |
92 vldr d18, .L38 | |
93 vldr d19, .L38+8 | |
94 str r4, [r7, #240] | |
95 ldr r6, [r7, #184] | |
96 ldr r4, [r7, #188] | |
97 str r0, [r7, #224] | |
98 str r1, [r7, #220] | |
99 str r8, [r7, #208] | |
100 str r2, [r7, #216] | |
101 str r3, [r7, #204] | |
102 str r5, [r7, #212] | |
103 str r6, [r7, #252] | |
104 str r4, [r7, #248] | |
105 .L4: | |
106 ldr r2, [r7, #244] | |
107 add r9, r7, #216 | |
108 ldr r3, [r7, #244] | |
109 vadd.i32 q8, q10, q9 | |
110 ldr r6, [r7, #208] | |
111 vmov q15, q13 @ v4si | |
112 ldr r5, [r7, #240] | |
113 vmov q3, q12 @ v4si | |
114 ldr r4, [r7, #244] | |
115 vmov q2, q11 @ v4si | |
116 adds r5, r5, r6 | |
117 ldr r2, [r2, #8] | |
118 ldr r6, [r7, #400] | |
119 vmov q5, q10 @ v4si | |
120 ldr r3, [r3, #12] | |
121 vmov q1, q13 @ v4si | |
122 ldr r0, [r7, #244] | |
123 vmov q0, q12 @ v4si | |
124 ldr r1, [r7, #244] | |
125 vmov q4, q11 @ v4si | |
126 ldmia r9, {r9, sl, fp} | |
127 str r5, [r7, #228] | |
128 ldr r5, [r4, #24] | |
129 ldr r0, [r0, #0] | |
130 ldr r1, [r1, #4] | |
131 str r2, [r7, #264] | |
132 str r3, [r7, #236] | |
133 ldr r2, [r6, #4] | |
134 ldr r3, [r4, #28] | |
135 str r5, [r7, #280] | |
136 ldr r5, [r6, #0] | |
137 movs r6, #0 | |
138 ldr ip, [r7, #228] | |
139 ldr r8, [r7, #212] | |
140 str r0, [r7, #232] | |
141 str r1, [r7, #268] | |
142 ldr r0, [r4, #16] | |
143 ldr r1, [r4, #20] | |
144 movs r4, #10 | |
145 str r2, [r7, #24] | |
146 str r3, [r7, #284] | |
147 str r4, [r7, #256] | |
148 ldr r2, [r7, #264] | |
149 str r9, [r7, #276] | |
150 mov r9, r6 | |
151 ldr r6, [r7, #280] | |
152 str r8, [r7, #260] | |
153 mov r8, sl | |
154 str r1, [r7, #272] | |
155 mov sl, ip | |
156 str r6, [r7, #264] | |
157 mov r6, r5 | |
158 ldr r3, [r7, #236] | |
159 mov r5, r0 | |
160 ldr ip, [r7, #24] | |
161 ldr r1, [r7, #268] | |
162 ldr r0, [r7, #232] | |
163 b .L39 | |
164 .L40: | |
165 .align 3 | |
166 .L38: | |
167 .word 1 | |
168 .word 0 | |
169 .word 0 | |
170 .word 0 | |
171 .word .LANCHOR0-(.LPIC24+4) | |
172 .L39: | |
173 .L3: | |
174 vadd.i32 q4, q4, q0 | |
175 add r8, r8, r1 | |
176 vadd.i32 q2, q2, q3 | |
177 str r8, [r7, #268] | |
178 veor q5, q5, q4 | |
179 ldr r8, [r7, #276] | |
180 veor q8, q8, q2 | |
181 add fp, fp, r0 | |
182 str fp, [r7, #280] | |
183 add r8, r8, r2 | |
184 vrev32.16 q5, q5 | |
185 str r8, [r7, #276] | |
186 vrev32.16 q8, q8 | |
187 vadd.i32 q1, q1, q5 | |
188 vadd.i32 q15, q15, q8 | |
189 ldr r8, [r7, #280] | |
190 veor q0, q1, q0 | |
191 ldr r4, [r7, #260] | |
192 veor q3, q15, q3 | |
193 eor sl, sl, r8 | |
194 ldr r8, [r7, #276] | |
195 add fp, r4, r3 | |
196 vshl.i32 q7, q0, #12 | |
197 ldr r4, [r7, #268] | |
198 vshl.i32 q6, q3, #12 | |
199 eor r6, r6, r8 | |
200 eor r9, r9, r4 | |
201 ldr r4, [r7, #272] | |
202 vsri.32 q7, q0, #20 | |
203 ror r8, r6, #16 | |
204 ldr r6, [r7, #264] | |
205 eor ip, ip, fp | |
206 vsri.32 q6, q3, #20 | |
207 ror sl, sl, #16 | |
208 ror r9, r9, #16 | |
209 add r5, r5, sl | |
210 vadd.i32 q4, q4, q7 | |
211 str r5, [r7, #236] | |
212 vadd.i32 q2, q2, q6 | |
213 add r5, r4, r9 | |
214 add r4, r6, r8 | |
215 ldr r6, [r7, #284] | |
216 ror ip, ip, #16 | |
217 veor q5, q4, q5 | |
218 veor q8, q2, q8 | |
219 add r6, r6, ip | |
220 str r6, [r7, #264] | |
221 eors r1, r1, r5 | |
222 ldr r6, [r7, #236] | |
223 vshl.i32 q3, q5, #8 | |
224 vshl.i32 q14, q8, #8 | |
225 eors r2, r2, r4 | |
226 eors r0, r0, r6 | |
227 ldr r6, [r7, #264] | |
228 vsri.32 q3, q5, #24 | |
229 ror r1, r1, #20 | |
230 eors r3, r3, r6 | |
231 ldr r6, [r7, #280] | |
232 ror r0, r0, #20 | |
233 vsri.32 q14, q8, #24 | |
234 adds r6, r0, r6 | |
235 str r6, [r7, #284] | |
236 ldr r6, [r7, #268] | |
237 vadd.i32 q1, q1, q3 | |
238 vadd.i32 q15, q15, q14 | |
239 ror r2, r2, #20 | |
240 adds r6, r1, r6 | |
241 str r6, [r7, #260] | |
242 ldr r6, [r7, #276] | |
243 veor q6, q15, q6 | |
244 veor q7, q1, q7 | |
245 ror r3, r3, #20 | |
246 adds r6, r2, r6 | |
247 str r6, [r7, #280] | |
248 ldr r6, [r7, #284] | |
249 vshl.i32 q0, q6, #7 | |
250 vshl.i32 q5, q7, #7 | |
251 add fp, r3, fp | |
252 eor sl, r6, sl | |
253 ldr r6, [r7, #260] | |
254 eor ip, fp, ip | |
255 vsri.32 q0, q6, #25 | |
256 eor r9, r6, r9 | |
257 ldr r6, [r7, #280] | |
258 ror sl, sl, #24 | |
259 vsri.32 q5, q7, #25 | |
260 eor r8, r6, r8 | |
261 ldr r6, [r7, #236] | |
262 ror r9, r9, #24 | |
263 ror ip, ip, #24 | |
264 add r6, sl, r6 | |
265 str r6, [r7, #276] | |
266 ldr r6, [r7, #264] | |
267 add r5, r9, r5 | |
268 str r5, [r7, #272] | |
269 vext.32 q5, q5, q5, #1 | |
270 add r5, ip, r6 | |
271 ldr r6, [r7, #276] | |
272 vext.32 q0, q0, q0, #1 | |
273 vadd.i32 q4, q4, q5 | |
274 eors r0, r0, r6 | |
275 ldr r6, [r7, #272] | |
276 vadd.i32 q2, q2, q0 | |
277 vext.32 q3, q3, q3, #3 | |
278 ror r8, r8, #24 | |
279 eors r1, r1, r6 | |
280 vext.32 q14, q14, q14, #3 | |
281 add r4, r8, r4 | |
282 ldr r6, [r7, #284] | |
283 veor q3, q4, q3 | |
284 veor q14, q2, q14 | |
285 eors r2, r2, r4 | |
286 ror r1, r1, #25 | |
287 vext.32 q1, q1, q1, #2 | |
288 adds r6, r1, r6 | |
289 str r6, [r7, #284] | |
290 vext.32 q15, q15, q15, #2 | |
291 ldr r6, [r7, #260] | |
292 eors r3, r3, r5 | |
293 ror r2, r2, #25 | |
294 vrev32.16 q8, q14 | |
295 adds r6, r2, r6 | |
296 vrev32.16 q3, q3 | |
297 str r6, [r7, #268] | |
298 vadd.i32 q1, q1, q3 | |
299 ldr r6, [r7, #280] | |
300 vadd.i32 q15, q15, q8 | |
301 ror r3, r3, #25 | |
302 veor q5, q1, q5 | |
303 adds r6, r3, r6 | |
304 veor q0, q15, q0 | |
305 str r6, [r7, #264] | |
306 ldr r6, [r7, #268] | |
307 ror r0, r0, #25 | |
308 add fp, r0, fp | |
309 vshl.i32 q6, q5, #12 | |
310 eor sl, r6, sl | |
311 ldr r6, [r7, #284] | |
312 vshl.i32 q14, q0, #12 | |
313 eor r8, fp, r8 | |
314 eor ip, r6, ip | |
315 ldr r6, [r7, #264] | |
316 vsri.32 q6, q5, #20 | |
317 ror sl, sl, #16 | |
318 eor r9, r6, r9 | |
319 ror r6, r8, #16 | |
320 vsri.32 q14, q0, #20 | |
321 ldr r8, [r7, #272] | |
322 ror ip, ip, #16 | |
323 add r5, sl, r5 | |
324 add r8, r6, r8 | |
325 add r4, ip, r4 | |
326 str r4, [r7, #236] | |
327 eor r0, r8, r0 | |
328 str r5, [r7, #280] | |
329 vadd.i32 q4, q4, q6 | |
330 ldr r5, [r7, #236] | |
331 vadd.i32 q2, q2, q14 | |
332 ldr r4, [r7, #276] | |
333 ror r0, r0, #20 | |
334 veor q3, q4, q3 | |
335 eors r1, r1, r5 | |
336 veor q0, q2, q8 | |
337 str r8, [r7, #272] | |
338 str r0, [r7, #24] | |
339 add fp, r0, fp | |
340 ldr r8, [r7, #280] | |
341 ror r9, r9, #16 | |
342 ldr r0, [r7, #284] | |
343 add r4, r9, r4 | |
344 str fp, [r7, #260] | |
345 ror r1, r1, #20 | |
346 add fp, r1, r0 | |
347 eor r2, r8, r2 | |
348 ldr r0, [r7, #260] | |
349 eors r3, r3, r4 | |
350 vshl.i32 q5, q3, #8 | |
351 str r4, [r7, #232] | |
352 vshl.i32 q8, q0, #8 | |
353 ldr r4, [r7, #268] | |
354 ldr r5, [r7, #264] | |
355 ror r2, r2, #20 | |
356 ror r3, r3, #20 | |
357 eors r6, r6, r0 | |
358 adds r5, r3, r5 | |
359 add r8, r2, r4 | |
360 vsri.32 q5, q3, #24 | |
361 ldr r4, [r7, #272] | |
362 eor r9, r5, r9 | |
363 eor ip, fp, ip | |
364 vsri.32 q8, q0, #24 | |
365 eor sl, r8, sl | |
366 ror r6, r6, #24 | |
367 ldr r0, [r7, #280] | |
368 str r5, [r7, #276] | |
369 adds r4, r6, r4 | |
370 ldr r5, [r7, #236] | |
371 vadd.i32 q1, q1, q5 | |
372 str r4, [r7, #272] | |
373 vadd.i32 q15, q15, q8 | |
374 ldr r4, [r7, #232] | |
375 ror ip, ip, #24 | |
376 ror sl, sl, #24 | |
377 ror r9, r9, #24 | |
378 add r5, ip, r5 | |
379 add r0, sl, r0 | |
380 str r5, [r7, #264] | |
381 add r5, r9, r4 | |
382 str r0, [r7, #284] | |
383 veor q6, q1, q6 | |
384 ldr r4, [r7, #24] | |
385 veor q14, q15, q14 | |
386 ldr r0, [r7, #272] | |
387 eors r3, r3, r5 | |
388 vshl.i32 q0, q6, #7 | |
389 vext.32 q1, q1, q1, #2 | |
390 eors r0, r0, r4 | |
391 ldr r4, [r7, #284] | |
392 str r0, [r7, #280] | |
393 vshl.i32 q3, q14, #7 | |
394 eors r2, r2, r4 | |
395 ldr r4, [r7, #280] | |
396 ldr r0, [r7, #264] | |
397 vsri.32 q0, q6, #25 | |
398 ror r2, r2, #25 | |
399 ror r3, r3, #25 | |
400 eors r1, r1, r0 | |
401 vsri.32 q3, q14, #25 | |
402 ror r0, r4, #25 | |
403 ldr r4, [r7, #256] | |
404 ror r1, r1, #25 | |
405 vext.32 q5, q5, q5, #1 | |
406 subs r4, r4, #1 | |
407 str r4, [r7, #256] | |
408 vext.32 q15, q15, q15, #2 | |
409 vext.32 q8, q8, q8, #1 | |
410 vext.32 q0, q0, q0, #3 | |
411 vext.32 q3, q3, q3, #3 | |
412 bne .L3 | |
413 ldr r4, [r7, #264] | |
414 vadd.i32 q14, q10, q9 | |
415 str r2, [r7, #264] | |
416 vadd.i32 q10, q10, q5 | |
417 ldr r2, [r7, #252] | |
418 vld1.64 {d12-d13}, [r2:64] | |
419 ldr r2, [r7, #220] | |
420 vadd.i32 q4, q11, q4 | |
421 str ip, [r7, #24] | |
422 mov ip, sl | |
423 mov sl, r8 | |
424 ldr r8, [r7, #260] | |
425 add sl, sl, r2 | |
426 ldr r2, [r7, #212] | |
427 str r4, [r7, #280] | |
428 vadd.i32 q0, q12, q0 | |
429 ldr r4, [r7, #224] | |
430 add r8, r8, r2 | |
431 ldr r2, [r7, #240] | |
432 vadd.i32 q1, q13, q1 | |
433 str r0, [r7, #232] | |
434 add fp, fp, r4 | |
435 mov r0, r5 | |
436 ldr r4, [r7, #216] | |
437 mov r5, r6 | |
438 mov r6, r9 | |
439 ldr r9, [r7, #276] | |
440 adds r2, r2, #3 | |
441 str r2, [r7, #240] | |
442 vadd.i32 q2, q11, q2 | |
443 ldr r2, [r7, #252] | |
444 add r9, r9, r4 | |
445 vadd.i32 q3, q12, q3 | |
446 ldr r4, [r7, #228] | |
447 vadd.i32 q15, q13, q15 | |
448 str r1, [r7, #268] | |
449 vadd.i32 q8, q14, q8 | |
450 str r3, [r7, #236] | |
451 veor q4, q4, q6 | |
452 ldr r3, [r7, #284] | |
453 ldr r1, [r7, #272] | |
454 add ip, r4, ip | |
455 ldr r4, [r7, #248] | |
456 vst1.64 {d8-d9}, [r4:64] | |
457 vldr d8, [r2, #16] | |
458 vldr d9, [r2, #24] | |
459 veor q0, q0, q4 | |
460 vstr d0, [r4, #16] | |
461 vstr d1, [r4, #24] | |
462 vldr d0, [r2, #32] | |
463 vldr d1, [r2, #40] | |
464 veor q1, q1, q0 | |
465 vstr d2, [r4, #32] | |
466 vstr d3, [r4, #40] | |
467 vldr d2, [r2, #48] | |
468 vldr d3, [r2, #56] | |
469 veor q10, q10, q1 | |
470 vstr d20, [r4, #48] | |
471 vstr d21, [r4, #56] | |
472 vldr d8, [r2, #64] | |
473 vldr d9, [r2, #72] | |
474 veor q2, q2, q4 | |
475 vstr d4, [r4, #64] | |
476 vstr d5, [r4, #72] | |
477 vldr d10, [r2, #80] | |
478 vldr d11, [r2, #88] | |
479 veor q3, q3, q5 | |
480 vstr d6, [r4, #80] | |
481 vstr d7, [r4, #88] | |
482 vldr d12, [r2, #96] | |
483 vldr d13, [r2, #104] | |
484 veor q15, q15, q6 | |
485 vstr d30, [r4, #96] | |
486 vstr d31, [r4, #104] | |
487 vldr d20, [r2, #112] | |
488 vldr d21, [r2, #120] | |
489 veor q8, q8, q10 | |
490 vstr d16, [r4, #112] | |
491 vstr d17, [r4, #120] | |
492 ldr r4, [r2, #128] | |
493 ldr r2, [r7, #248] | |
494 vadd.i32 q10, q14, q9 | |
495 eor r4, fp, r4 | |
496 vadd.i32 q10, q10, q9 | |
497 str r4, [r2, #128] | |
498 ldr r4, [r7, #252] | |
499 ldr r2, [r4, #132] | |
500 eor r2, sl, r2 | |
501 ldr sl, [r7, #248] | |
502 str r2, [sl, #132] | |
503 ldr r2, [r4, #136] | |
504 eor r2, r9, r2 | |
505 str r2, [sl, #136] | |
506 ldr r2, [r4, #140] | |
507 eor r2, r8, r2 | |
508 str r2, [sl, #140] | |
509 ldr r2, [r7, #244] | |
510 ldr r4, [r4, #144] | |
511 ldr r2, [r2, #0] | |
512 str r4, [r7, #44] | |
513 ldr r4, [r7, #232] | |
514 add r8, r4, r2 | |
515 ldr r2, [r7, #44] | |
516 ldr r4, [r7, #244] | |
517 eor r8, r8, r2 | |
518 ldr r2, [r7, #252] | |
519 str r8, [sl, #144] | |
520 ldr r4, [r4, #4] | |
521 ldr r2, [r2, #148] | |
522 str r2, [r7, #40] | |
523 ldr r2, [r7, #268] | |
524 add r8, r2, r4 | |
525 ldr r4, [r7, #40] | |
526 ldr r2, [r7, #244] | |
527 eor r8, r8, r4 | |
528 ldr r4, [r7, #252] | |
529 str r8, [sl, #148] | |
530 ldr r2, [r2, #8] | |
531 ldr r4, [r4, #152] | |
532 str r4, [r7, #36] | |
533 ldr r4, [r7, #264] | |
534 add r8, r4, r2 | |
535 ldr r2, [r7, #36] | |
536 eor r8, r8, r2 | |
537 str r8, [sl, #152] | |
538 ldr r2, [r7, #252] | |
539 ldr r4, [r7, #244] | |
540 ldr r2, [r2, #156] | |
541 ldr r4, [r4, #12] | |
542 str r2, [r7, #32] | |
543 ldr r2, [r7, #236] | |
544 add r8, r2, r4 | |
545 ldr r4, [r7, #32] | |
546 ldr r2, [r7, #252] | |
547 eor r8, r8, r4 | |
548 str r8, [sl, #156] | |
549 ldr r8, [r7, #244] | |
550 ldr r2, [r2, #160] | |
551 ldr r4, [r8, #16] | |
552 adds r0, r0, r4 | |
553 ldr r4, [r7, #252] | |
554 eors r0, r0, r2 | |
555 str r0, [sl, #160] | |
556 ldr r0, [r8, #20] | |
557 ldr r2, [r4, #164] | |
558 adds r1, r1, r0 | |
559 ldr r0, [r7, #280] | |
560 eors r1, r1, r2 | |
561 str r1, [sl, #164] | |
562 ldr r2, [r8, #24] | |
563 ldr r1, [r4, #168] | |
564 adds r2, r0, r2 | |
565 eors r2, r2, r1 | |
566 str r2, [sl, #168] | |
567 ldr r1, [r8, #28] | |
568 ldr r2, [r4, #172] | |
569 adds r3, r3, r1 | |
570 eors r3, r3, r2 | |
571 str r3, [sl, #172] | |
572 ldr r3, [r4, #176] | |
573 eor r3, ip, r3 | |
574 str r3, [sl, #176] | |
575 ldr r3, [r4, #180] | |
576 ldr r4, [r7, #400] | |
577 eors r6, r6, r3 | |
578 str r6, [sl, #180] | |
579 ldr r6, [r7, #252] | |
580 ldr r2, [r4, #0] | |
581 ldr r3, [r6, #184] | |
582 adds r5, r5, r2 | |
583 eors r5, r5, r3 | |
584 str r5, [sl, #184] | |
585 ldr r2, [r6, #188] | |
586 adds r6, r6, #192 | |
587 ldr r3, [r4, #4] | |
588 str r6, [r7, #252] | |
589 ldr r0, [r7, #24] | |
590 ldr r1, [r7, #240] | |
591 adds r4, r0, r3 | |
592 eors r4, r4, r2 | |
593 ldr r2, [r7, #204] | |
594 str r4, [sl, #188] | |
595 add sl, sl, #192 | |
596 cmp r1, r2 | |
597 str sl, [r7, #248] | |
598 bne .L4 | |
599 ldr r4, [r7, #192] | |
600 ldr r3, [r7, #180] | |
601 ldr r6, [r7, #188] | |
602 adds r5, r3, r4 | |
603 ldr r8, [r7, #184] | |
604 lsls r5, r5, #6 | |
605 adds r4, r6, r5 | |
606 add r5, r8, r5 | |
607 .L2: | |
608 ldr r9, [r7, #196] | |
609 movw r3, #43691 | |
610 movt r3, 43690 | |
611 ldr sl, [r7, #196] | |
612 umull r9, r3, r3, r9 | |
613 lsrs r3, r3, #7 | |
614 add r3, r3, r3, lsl #1 | |
615 sub r3, sl, r3, lsl #6 | |
616 lsrs r6, r3, #6 | |
617 beq .L5 | |
618 add r1, r5, #16 | |
619 add r2, r4, #16 | |
620 mov r0, r6 | |
621 vldr d30, .L41 | |
622 vldr d31, .L41+8 | |
623 .L6: | |
624 vmov q8, q10 @ v4si | |
625 movs r3, #10 | |
626 vmov q1, q13 @ v4si | |
627 vmov q14, q12 @ v4si | |
628 vmov q3, q11 @ v4si | |
629 .L7: | |
630 vadd.i32 q3, q3, q14 | |
631 subs r3, r3, #1 | |
632 veor q2, q8, q3 | |
633 vrev32.16 q2, q2 | |
634 vadd.i32 q8, q1, q2 | |
635 veor q9, q8, q14 | |
636 vshl.i32 q14, q9, #12 | |
637 vsri.32 q14, q9, #20 | |
638 vadd.i32 q3, q3, q14 | |
639 veor q2, q3, q2 | |
640 vshl.i32 q9, q2, #8 | |
641 vsri.32 q9, q2, #24 | |
642 vadd.i32 q8, q8, q9 | |
643 vext.32 q9, q9, q9, #3 | |
644 veor q14, q8, q14 | |
645 vext.32 q1, q8, q8, #2 | |
646 vshl.i32 q8, q14, #7 | |
647 vsri.32 q8, q14, #25 | |
648 vext.32 q8, q8, q8, #1 | |
649 vadd.i32 q3, q3, q8 | |
650 veor q2, q3, q9 | |
651 vrev32.16 q2, q2 | |
652 vadd.i32 q9, q1, q2 | |
653 veor q8, q9, q8 | |
654 vshl.i32 q14, q8, #12 | |
655 vsri.32 q14, q8, #20 | |
656 vadd.i32 q3, q3, q14 | |
657 veor q2, q3, q2 | |
658 vshl.i32 q8, q2, #8 | |
659 vsri.32 q8, q2, #24 | |
660 vadd.i32 q9, q9, q8 | |
661 vext.32 q8, q8, q8, #1 | |
662 veor q14, q9, q14 | |
663 vext.32 q1, q9, q9, #2 | |
664 vshl.i32 q9, q14, #7 | |
665 vsri.32 q9, q14, #25 | |
666 vext.32 q14, q9, q9, #3 | |
667 bne .L7 | |
668 vadd.i32 q8, q10, q8 | |
669 subs r0, r0, #1 | |
670 vadd.i32 q3, q11, q3 | |
671 vldr d0, [r1, #-16] | |
672 vldr d1, [r1, #-8] | |
673 vadd.i32 q14, q12, q14 | |
674 vadd.i32 q1, q13, q1 | |
675 veor q3, q3, q0 | |
676 vstr d6, [r2, #-16] | |
677 vstr d7, [r2, #-8] | |
678 vadd.i32 q10, q10, q15 | |
679 vld1.64 {d8-d9}, [r1:64] | |
680 veor q14, q14, q4 | |
681 vst1.64 {d28-d29}, [r2:64] | |
682 vldr d10, [r1, #16] | |
683 vldr d11, [r1, #24] | |
684 veor q1, q1, q5 | |
685 vstr d2, [r2, #16] | |
686 vstr d3, [r2, #24] | |
687 vldr d18, [r1, #32] | |
688 vldr d19, [r1, #40] | |
689 add r1, r1, #64 | |
690 veor q8, q8, q9 | |
691 vstr d16, [r2, #32] | |
692 vstr d17, [r2, #40] | |
693 add r2, r2, #64 | |
694 bne .L6 | |
695 lsls r6, r6, #6 | |
696 adds r4, r4, r6 | |
697 adds r5, r5, r6 | |
698 .L5: | |
699 ldr r6, [r7, #196] | |
700 ands ip, r6, #63 | |
701 beq .L1 | |
702 vmov q8, q10 @ v4si | |
703 movs r3, #10 | |
704 vmov q14, q13 @ v4si | |
705 vmov q9, q12 @ v4si | |
706 vmov q15, q11 @ v4si | |
707 .L10: | |
708 vadd.i32 q15, q15, q9 | |
709 subs r3, r3, #1 | |
710 veor q8, q8, q15 | |
711 vrev32.16 q8, q8 | |
712 vadd.i32 q3, q14, q8 | |
713 veor q9, q3, q9 | |
714 vshl.i32 q14, q9, #12 | |
715 vsri.32 q14, q9, #20 | |
716 vadd.i32 q15, q15, q14 | |
717 veor q9, q15, q8 | |
718 vshl.i32 q8, q9, #8 | |
719 vsri.32 q8, q9, #24 | |
720 vadd.i32 q9, q3, q8 | |
721 vext.32 q8, q8, q8, #3 | |
722 veor q2, q9, q14 | |
723 vext.32 q14, q9, q9, #2 | |
724 vshl.i32 q9, q2, #7 | |
725 vsri.32 q9, q2, #25 | |
726 vext.32 q9, q9, q9, #1 | |
727 vadd.i32 q15, q15, q9 | |
728 veor q3, q15, q8 | |
729 vrev32.16 q3, q3 | |
730 vadd.i32 q14, q14, q3 | |
731 veor q8, q14, q9 | |
732 vshl.i32 q9, q8, #12 | |
733 vsri.32 q9, q8, #20 | |
734 vadd.i32 q15, q15, q9 | |
735 veor q3, q15, q3 | |
736 vshl.i32 q8, q3, #8 | |
737 vsri.32 q8, q3, #24 | |
738 vadd.i32 q14, q14, q8 | |
739 vext.32 q8, q8, q8, #1 | |
740 veor q3, q14, q9 | |
741 vext.32 q14, q14, q14, #2 | |
742 vshl.i32 q9, q3, #7 | |
743 vsri.32 q9, q3, #25 | |
744 vext.32 q9, q9, q9, #3 | |
745 bne .L10 | |
746 cmp ip, #15 | |
747 vadd.i32 q11, q11, q15 | |
748 bhi .L37 | |
749 ldr r9, [r7, #200] | |
750 vst1.64 {d22-d23}, [r9:128] | |
751 .L14: | |
752 ldr sl, [r7, #196] | |
753 and r3, sl, #48 | |
754 cmp ip, r3 | |
755 bls .L1 | |
756 adds r0, r5, r3 | |
757 adds r1, r4, r3 | |
758 add r2, r0, #16 | |
759 add r6, r1, #16 | |
760 cmp r1, r2 | |
761 it cc | |
762 cmpcc r0, r6 | |
763 rsb r9, r3, ip | |
764 ite cc | |
765 movcc r2, #0 | |
766 movcs r2, #1 | |
767 cmp r9, #15 | |
768 ite ls | |
769 movls r2, #0 | |
770 andhi r2, r2, #1 | |
771 lsr r8, r9, #4 | |
772 eor r2, r2, #1 | |
773 cmp r8, #0 | |
774 it eq | |
775 orreq r2, r2, #1 | |
776 lsl sl, r8, #4 | |
777 cbnz r2, .L35 | |
778 ldr fp, [r7, #200] | |
779 add r6, fp, r3 | |
780 .L17: | |
781 vld1.8 {q8}, [r0]! | |
782 adds r2, r2, #1 | |
783 cmp r8, r2 | |
784 vld1.8 {q9}, [r6]! | |
785 veor q8, q9, q8 | |
786 vst1.8 {q8}, [r1]! | |
787 bhi .L17 | |
788 cmp r9, sl | |
789 add r3, r3, sl | |
790 beq .L1 | |
791 .L35: | |
792 ldr r0, [r7, #200] | |
793 .L25: | |
794 ldrb r2, [r5, r3] @ zero_extendqisi2 | |
795 ldrb r1, [r3, r0] @ zero_extendqisi2 | |
796 eors r2, r2, r1 | |
797 strb r2, [r4, r3] | |
798 adds r3, r3, #1 | |
799 cmp ip, r3 | |
800 bhi .L25 | |
801 .L1: | |
802 add r7, r7, #304 | |
803 mov sp, r7 | |
804 fldmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15} | |
805 pop {r4, r5, r6, r7, r8, r9, sl, fp} | |
806 bx lr | |
807 .L37: | |
808 cmp ip, #31 | |
809 vld1.64 {d0-d1}, [r5:64] | |
810 vadd.i32 q9, q12, q9 | |
811 veor q11, q11, q0 | |
812 vst1.64 {d22-d23}, [r4:64] | |
813 bls .L12 | |
814 cmp ip, #47 | |
815 vldr d2, [r5, #16] | |
816 vldr d3, [r5, #24] | |
817 vadd.i32 q13, q13, q14 | |
818 veor q9, q9, q1 | |
819 vstr d18, [r4, #16] | |
820 vstr d19, [r4, #24] | |
821 bls .L13 | |
822 vadd.i32 q8, q8, q10 | |
823 vldr d0, [r5, #32] | |
824 vldr d1, [r5, #40] | |
825 ldr r6, [r7, #200] | |
826 vstr d16, [r6, #48] | |
827 vstr d17, [r6, #56] | |
828 veor q8, q13, q0 | |
829 vstr d16, [r4, #32] | |
830 vstr d17, [r4, #40] | |
831 b .L14 | |
832 .L12: | |
833 ldr r8, [r7, #200] | |
834 vstr d18, [r8, #16] | |
835 vstr d19, [r8, #24] | |
836 b .L14 | |
837 .L20: | |
838 ldr r5, [r7, #184] | |
839 ldr r4, [r7, #188] | |
840 b .L2 | |
841 .L13: | |
842 ldr r6, [r7, #200] | |
843 vstr d26, [r6, #32] | |
844 vstr d27, [r6, #40] | |
845 b .L14 | |
846 .L42: | |
847 .align 3 | |
848 .L41: | |
849 .word 1 | |
850 .word 0 | |
851 .word 0 | |
852 .word 0 | |
853 .size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon | |
854 .section .rodata | |
855 .align 3 | |
856 .LANCHOR0 = . + 0 | |
857 .LC0: | |
858 .word 1634760805 | |
859 .word 857760878 | |
860 .word 2036477234 | |
861 .word 1797285236 | |
862 .ident "GCC: (crosstool-NG linaro-1.13.1-4.7-2012.10-20121022 - Linaro
GCC 2012.10) 4.7.3 20121001 (prerelease)" | |
863 .section .note.GNU-stack,"",%progbits | |
OLD | NEW |