OLD | NEW |
| (Empty) |
1 #if defined(__arm__) | |
2 #include <openssl/arm_arch.h> | |
3 | |
4 #if __ARM_MAX_ARCH__>=7 | |
5 .text | |
6 .arch armv7-a | |
7 .fpu neon | |
8 .code 32 | |
9 .align 5 | |
10 .Lrcon: | |
11 .long 0x01,0x01,0x01,0x01 | |
12 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat | |
13 .long 0x1b,0x1b,0x1b,0x1b | |
14 | |
15 .globl aes_v8_set_encrypt_key | |
16 .hidden aes_v8_set_encrypt_key | |
17 .type aes_v8_set_encrypt_key,%function | |
18 .align 5 | |
19 aes_v8_set_encrypt_key: | |
20 .Lenc_key: | |
21 mov r3,#-1 | |
22 cmp r0,#0 | |
23 beq .Lenc_key_abort | |
24 cmp r2,#0 | |
25 beq .Lenc_key_abort | |
26 mov r3,#-2 | |
27 cmp r1,#128 | |
28 blt .Lenc_key_abort | |
29 cmp r1,#256 | |
30 bgt .Lenc_key_abort | |
31 tst r1,#0x3f | |
32 bne .Lenc_key_abort | |
33 | |
34 adr r3,.Lrcon | |
35 cmp r1,#192 | |
36 | |
37 veor q0,q0,q0 | |
38 vld1.8 {q3},[r0]! | |
39 mov r1,#8 @ reuse r1 | |
40 vld1.32 {q1,q2},[r3]! | |
41 | |
42 blt .Loop128 | |
43 beq .L192 | |
44 b .L256 | |
45 | |
46 .align 4 | |
47 .Loop128: | |
48 vtbl.8 d20,{q3},d4 | |
49 vtbl.8 d21,{q3},d5 | |
50 vext.8 q9,q0,q3,#12 | |
51 vst1.32 {q3},[r2]! | |
52 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
53 subs r1,r1,#1 | |
54 | |
55 veor q3,q3,q9 | |
56 vext.8 q9,q0,q9,#12 | |
57 veor q3,q3,q9 | |
58 vext.8 q9,q0,q9,#12 | |
59 veor q10,q10,q1 | |
60 veor q3,q3,q9 | |
61 vshl.u8 q1,q1,#1 | |
62 veor q3,q3,q10 | |
63 bne .Loop128 | |
64 | |
65 vld1.32 {q1},[r3] | |
66 | |
67 vtbl.8 d20,{q3},d4 | |
68 vtbl.8 d21,{q3},d5 | |
69 vext.8 q9,q0,q3,#12 | |
70 vst1.32 {q3},[r2]! | |
71 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
72 | |
73 veor q3,q3,q9 | |
74 vext.8 q9,q0,q9,#12 | |
75 veor q3,q3,q9 | |
76 vext.8 q9,q0,q9,#12 | |
77 veor q10,q10,q1 | |
78 veor q3,q3,q9 | |
79 vshl.u8 q1,q1,#1 | |
80 veor q3,q3,q10 | |
81 | |
82 vtbl.8 d20,{q3},d4 | |
83 vtbl.8 d21,{q3},d5 | |
84 vext.8 q9,q0,q3,#12 | |
85 vst1.32 {q3},[r2]! | |
86 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
87 | |
88 veor q3,q3,q9 | |
89 vext.8 q9,q0,q9,#12 | |
90 veor q3,q3,q9 | |
91 vext.8 q9,q0,q9,#12 | |
92 veor q10,q10,q1 | |
93 veor q3,q3,q9 | |
94 veor q3,q3,q10 | |
95 vst1.32 {q3},[r2] | |
96 add r2,r2,#0x50 | |
97 | |
98 mov r12,#10 | |
99 b .Ldone | |
100 | |
101 .align 4 | |
102 .L192: | |
103 vld1.8 {d16},[r0]! | |
104 vmov.i8 q10,#8 @ borrow q10 | |
105 vst1.32 {q3},[r2]! | |
106 vsub.i8 q2,q2,q10 @ adjust the mask | |
107 | |
108 .Loop192: | |
109 vtbl.8 d20,{q8},d4 | |
110 vtbl.8 d21,{q8},d5 | |
111 vext.8 q9,q0,q3,#12 | |
112 vst1.32 {d16},[r2]! | |
113 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
114 subs r1,r1,#1 | |
115 | |
116 veor q3,q3,q9 | |
117 vext.8 q9,q0,q9,#12 | |
118 veor q3,q3,q9 | |
119 vext.8 q9,q0,q9,#12 | |
120 veor q3,q3,q9 | |
121 | |
122 vdup.32 q9,d7[1] | |
123 veor q9,q9,q8 | |
124 veor q10,q10,q1 | |
125 vext.8 q8,q0,q8,#12 | |
126 vshl.u8 q1,q1,#1 | |
127 veor q8,q8,q9 | |
128 veor q3,q3,q10 | |
129 veor q8,q8,q10 | |
130 vst1.32 {q3},[r2]! | |
131 bne .Loop192 | |
132 | |
133 mov r12,#12 | |
134 add r2,r2,#0x20 | |
135 b .Ldone | |
136 | |
137 .align 4 | |
138 .L256: | |
139 vld1.8 {q8},[r0] | |
140 mov r1,#7 | |
141 mov r12,#14 | |
142 vst1.32 {q3},[r2]! | |
143 | |
144 .Loop256: | |
145 vtbl.8 d20,{q8},d4 | |
146 vtbl.8 d21,{q8},d5 | |
147 vext.8 q9,q0,q3,#12 | |
148 vst1.32 {q8},[r2]! | |
149 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
150 subs r1,r1,#1 | |
151 | |
152 veor q3,q3,q9 | |
153 vext.8 q9,q0,q9,#12 | |
154 veor q3,q3,q9 | |
155 vext.8 q9,q0,q9,#12 | |
156 veor q10,q10,q1 | |
157 veor q3,q3,q9 | |
158 vshl.u8 q1,q1,#1 | |
159 veor q3,q3,q10 | |
160 vst1.32 {q3},[r2]! | |
161 beq .Ldone | |
162 | |
163 vdup.32 q10,d7[1] | |
164 vext.8 q9,q0,q8,#12 | |
165 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
166 | |
167 veor q8,q8,q9 | |
168 vext.8 q9,q0,q9,#12 | |
169 veor q8,q8,q9 | |
170 vext.8 q9,q0,q9,#12 | |
171 veor q8,q8,q9 | |
172 | |
173 veor q8,q8,q10 | |
174 b .Loop256 | |
175 | |
176 .Ldone: | |
177 str r12,[r2] | |
178 mov r3,#0 | |
179 | |
180 .Lenc_key_abort: | |
181 mov r0,r3 @ return value | |
182 | |
183 bx lr | |
184 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key | |
185 | |
186 .globl aes_v8_set_decrypt_key | |
187 .hidden aes_v8_set_decrypt_key | |
188 .type aes_v8_set_decrypt_key,%function | |
189 .align 5 | |
190 aes_v8_set_decrypt_key: | |
191 stmdb sp!,{r4,lr} | |
192 bl .Lenc_key | |
193 | |
194 cmp r0,#0 | |
195 bne .Ldec_key_abort | |
196 | |
197 sub r2,r2,#240 @ restore original r2 | |
198 mov r4,#-16 | |
199 add r0,r2,r12,lsl#4 @ end of key schedule | |
200 | |
201 vld1.32 {q0},[r2] | |
202 vld1.32 {q1},[r0] | |
203 vst1.32 {q0},[r0],r4 | |
204 vst1.32 {q1},[r2]! | |
205 | |
206 .Loop_imc: | |
207 vld1.32 {q0},[r2] | |
208 vld1.32 {q1},[r0] | |
209 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
210 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
211 vst1.32 {q0},[r0],r4 | |
212 vst1.32 {q1},[r2]! | |
213 cmp r0,r2 | |
214 bhi .Loop_imc | |
215 | |
216 vld1.32 {q0},[r2] | |
217 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
218 vst1.32 {q0},[r0] | |
219 | |
220 eor r0,r0,r0 @ return value | |
221 .Ldec_key_abort: | |
222 ldmia sp!,{r4,pc} | |
223 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key | |
224 .globl aes_v8_encrypt | |
225 .hidden aes_v8_encrypt | |
226 .type aes_v8_encrypt,%function | |
227 .align 5 | |
228 aes_v8_encrypt: | |
229 ldr r3,[r2,#240] | |
230 vld1.32 {q0},[r2]! | |
231 vld1.8 {q2},[r0] | |
232 sub r3,r3,#2 | |
233 vld1.32 {q1},[r2]! | |
234 | |
235 .Loop_enc: | |
236 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 | |
237 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 | |
238 vld1.32 {q0},[r2]! | |
239 subs r3,r3,#2 | |
240 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 | |
241 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 | |
242 vld1.32 {q1},[r2]! | |
243 bgt .Loop_enc | |
244 | |
245 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 | |
246 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 | |
247 vld1.32 {q0},[r2] | |
248 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 | |
249 veor q2,q2,q0 | |
250 | |
251 vst1.8 {q2},[r1] | |
252 bx lr | |
253 .size aes_v8_encrypt,.-aes_v8_encrypt | |
254 .globl aes_v8_decrypt | |
255 .hidden aes_v8_decrypt | |
256 .type aes_v8_decrypt,%function | |
257 .align 5 | |
258 aes_v8_decrypt: | |
259 ldr r3,[r2,#240] | |
260 vld1.32 {q0},[r2]! | |
261 vld1.8 {q2},[r0] | |
262 sub r3,r3,#2 | |
263 vld1.32 {q1},[r2]! | |
264 | |
265 .Loop_dec: | |
266 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 | |
267 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 | |
268 vld1.32 {q0},[r2]! | |
269 subs r3,r3,#2 | |
270 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 | |
271 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 | |
272 vld1.32 {q1},[r2]! | |
273 bgt .Loop_dec | |
274 | |
275 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 | |
276 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 | |
277 vld1.32 {q0},[r2] | |
278 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 | |
279 veor q2,q2,q0 | |
280 | |
281 vst1.8 {q2},[r1] | |
282 bx lr | |
283 .size aes_v8_decrypt,.-aes_v8_decrypt | |
284 .globl aes_v8_cbc_encrypt | |
285 .hidden aes_v8_cbc_encrypt | |
286 .type aes_v8_cbc_encrypt,%function | |
287 .align 5 | |
288 aes_v8_cbc_encrypt: | |
289 mov ip,sp | |
290 stmdb sp!,{r4,r5,r6,r7,r8,lr} | |
291 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specificati
on says so | |
292 ldmia ip,{r4,r5} @ load remaining args | |
293 subs r2,r2,#16 | |
294 mov r8,#16 | |
295 blo .Lcbc_abort | |
296 moveq r8,#0 | |
297 | |
298 cmp r5,#0 @ en- or decrypting? | |
299 ldr r5,[r3,#240] | |
300 and r2,r2,#-16 | |
301 vld1.8 {q6},[r4] | |
302 vld1.8 {q0},[r0],r8 | |
303 | |
304 vld1.32 {q8,q9},[r3] @ load key schedule... | |
305 sub r5,r5,#6 | |
306 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys | |
307 sub r5,r5,#2 | |
308 vld1.32 {q10,q11},[r7]! | |
309 vld1.32 {q12,q13},[r7]! | |
310 vld1.32 {q14,q15},[r7]! | |
311 vld1.32 {q7},[r7] | |
312 | |
313 add r7,r3,#32 | |
314 mov r6,r5 | |
315 beq .Lcbc_dec | |
316 | |
317 cmp r5,#2 | |
318 veor q0,q0,q6 | |
319 veor q5,q8,q7 | |
320 beq .Lcbc_enc128 | |
321 | |
322 vld1.32 {q2,q3},[r7] | |
323 add r7,r3,#16 | |
324 add r6,r3,#16*4 | |
325 add r12,r3,#16*5 | |
326 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
327 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
328 add r14,r3,#16*6 | |
329 add r3,r3,#16*7 | |
330 b .Lenter_cbc_enc | |
331 | |
332 .align 4 | |
333 .Loop_cbc_enc: | |
334 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
335 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
336 vst1.8 {q6},[r1]! | |
337 .Lenter_cbc_enc: | |
338 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
339 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
340 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 | |
341 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
342 vld1.32 {q8},[r6] | |
343 cmp r5,#4 | |
344 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 | |
345 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
346 vld1.32 {q9},[r12] | |
347 beq .Lcbc_enc192 | |
348 | |
349 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
350 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
351 vld1.32 {q8},[r14] | |
352 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
353 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
354 vld1.32 {q9},[r3] | |
355 nop | |
356 | |
357 .Lcbc_enc192: | |
358 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
359 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
360 subs r2,r2,#16 | |
361 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
362 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
363 moveq r8,#0 | |
364 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 | |
365 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
366 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 | |
367 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
368 vld1.8 {q8},[r0],r8 | |
369 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 | |
370 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
371 veor q8,q8,q5 | |
372 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 | |
373 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
374 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] | |
375 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 | |
376 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
377 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 | |
378 veor q6,q0,q7 | |
379 bhs .Loop_cbc_enc | |
380 | |
381 vst1.8 {q6},[r1]! | |
382 b .Lcbc_done | |
383 | |
384 .align 5 | |
385 .Lcbc_enc128: | |
386 vld1.32 {q2,q3},[r7] | |
387 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
388 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
389 b .Lenter_cbc_enc128 | |
390 .Loop_cbc_enc128: | |
391 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
392 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
393 vst1.8 {q6},[r1]! | |
394 .Lenter_cbc_enc128: | |
395 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
396 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
397 subs r2,r2,#16 | |
398 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 | |
399 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
400 moveq r8,#0 | |
401 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 | |
402 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
403 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 | |
404 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
405 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 | |
406 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
407 vld1.8 {q8},[r0],r8 | |
408 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 | |
409 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
410 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 | |
411 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
412 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 | |
413 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
414 veor q8,q8,q5 | |
415 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 | |
416 veor q6,q0,q7 | |
417 bhs .Loop_cbc_enc128 | |
418 | |
419 vst1.8 {q6},[r1]! | |
420 b .Lcbc_done | |
421 .align 5 | |
422 .Lcbc_dec: | |
423 vld1.8 {q10},[r0]! | |
424 subs r2,r2,#32 @ bias | |
425 add r6,r5,#2 | |
426 vorr q3,q0,q0 | |
427 vorr q1,q0,q0 | |
428 vorr q11,q10,q10 | |
429 blo .Lcbc_dec_tail | |
430 | |
431 vorr q1,q10,q10 | |
432 vld1.8 {q10},[r0]! | |
433 vorr q2,q0,q0 | |
434 vorr q3,q1,q1 | |
435 vorr q11,q10,q10 | |
436 | |
437 .Loop3x_cbc_dec: | |
438 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 | |
439 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
440 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 | |
441 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
442 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 | |
443 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
444 vld1.32 {q8},[r7]! | |
445 subs r6,r6,#2 | |
446 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 | |
447 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
448 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 | |
449 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
450 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 | |
451 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
452 vld1.32 {q9},[r7]! | |
453 bgt .Loop3x_cbc_dec | |
454 | |
455 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 | |
456 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
457 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 | |
458 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
459 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 | |
460 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
461 veor q4,q6,q7 | |
462 subs r2,r2,#0x30 | |
463 veor q5,q2,q7 | |
464 movlo r6,r2 @ r6, r6, is zero at this point | |
465 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 | |
466 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
467 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 | |
468 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
469 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 | |
470 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
471 veor q9,q3,q7 | |
472 add r0,r0,r6 @ r0 is adjusted in such way that | |
473 @ at exit from the loop q1-q10 | |
474 @ are loaded with last "words" | |
475 vorr q6,q11,q11 | |
476 mov r7,r3 | |
477 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 | |
478 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
479 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 | |
480 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
481 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 | |
482 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
483 vld1.8 {q2},[r0]! | |
484 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 | |
485 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
486 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 | |
487 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
488 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 | |
489 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
490 vld1.8 {q3},[r0]! | |
491 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 | |
492 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
493 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 | |
494 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
495 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 | |
496 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
497 vld1.8 {q11},[r0]! | |
498 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 | |
499 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 | |
500 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 | |
501 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] | |
502 add r6,r5,#2 | |
503 veor q4,q4,q0 | |
504 veor q5,q5,q1 | |
505 veor q10,q10,q9 | |
506 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] | |
507 vst1.8 {q4},[r1]! | |
508 vorr q0,q2,q2 | |
509 vst1.8 {q5},[r1]! | |
510 vorr q1,q3,q3 | |
511 vst1.8 {q10},[r1]! | |
512 vorr q10,q11,q11 | |
513 bhs .Loop3x_cbc_dec | |
514 | |
515 cmn r2,#0x30 | |
516 beq .Lcbc_done | |
517 nop | |
518 | |
519 .Lcbc_dec_tail: | |
520 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 | |
521 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
522 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 | |
523 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
524 vld1.32 {q8},[r7]! | |
525 subs r6,r6,#2 | |
526 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 | |
527 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
528 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 | |
529 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
530 vld1.32 {q9},[r7]! | |
531 bgt .Lcbc_dec_tail | |
532 | |
533 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 | |
534 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
535 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 | |
536 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
537 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 | |
538 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
539 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 | |
540 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
541 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 | |
542 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
543 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 | |
544 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
545 cmn r2,#0x20 | |
546 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 | |
547 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
548 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 | |
549 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
550 veor q5,q6,q7 | |
551 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 | |
552 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
553 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 | |
554 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
555 veor q9,q3,q7 | |
556 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 | |
557 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 | |
558 beq .Lcbc_dec_one | |
559 veor q5,q5,q1 | |
560 veor q9,q9,q10 | |
561 vorr q6,q11,q11 | |
562 vst1.8 {q5},[r1]! | |
563 vst1.8 {q9},[r1]! | |
564 b .Lcbc_done | |
565 | |
566 .Lcbc_dec_one: | |
567 veor q5,q5,q10 | |
568 vorr q6,q11,q11 | |
569 vst1.8 {q5},[r1]! | |
570 | |
571 .Lcbc_done: | |
572 vst1.8 {q6},[r4] | |
573 .Lcbc_abort: | |
574 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} | |
575 ldmia sp!,{r4,r5,r6,r7,r8,pc} | |
576 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt | |
577 .globl aes_v8_ctr32_encrypt_blocks | |
578 .hidden aes_v8_ctr32_encrypt_blocks | |
579 .type aes_v8_ctr32_encrypt_blocks,%function | |
580 .align 5 | |
581 aes_v8_ctr32_encrypt_blocks: | |
582 mov ip,sp | |
583 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} | |
584 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specificati
on says so | |
585 ldr r4, [ip] @ load remaining arg | |
586 ldr r5,[r3,#240] | |
587 | |
588 ldr r8, [r4, #12] | |
589 vld1.32 {q0},[r4] | |
590 | |
591 vld1.32 {q8,q9},[r3] @ load key schedule... | |
592 sub r5,r5,#4 | |
593 mov r12,#16 | |
594 cmp r2,#2 | |
595 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys | |
596 sub r5,r5,#2 | |
597 vld1.32 {q12,q13},[r7]! | |
598 vld1.32 {q14,q15},[r7]! | |
599 vld1.32 {q7},[r7] | |
600 add r7,r3,#32 | |
601 mov r6,r5 | |
602 movlo r12,#0 | |
603 #ifndef __ARMEB__ | |
604 rev r8, r8 | |
605 #endif | |
606 vorr q1,q0,q0 | |
607 add r10, r8, #1 | |
608 vorr q10,q0,q0 | |
609 add r8, r8, #2 | |
610 vorr q6,q0,q0 | |
611 rev r10, r10 | |
612 vmov.32 d3[1],r10 | |
613 bls .Lctr32_tail | |
614 rev r12, r8 | |
615 sub r2,r2,#3 @ bias | |
616 vmov.32 d21[1],r12 | |
617 b .Loop3x_ctr32 | |
618 | |
619 .align 4 | |
620 .Loop3x_ctr32: | |
621 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
622 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
623 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 | |
624 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
625 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 | |
626 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 | |
627 vld1.32 {q8},[r7]! | |
628 subs r6,r6,#2 | |
629 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
630 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
631 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 | |
632 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
633 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 | |
634 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 | |
635 vld1.32 {q9},[r7]! | |
636 bgt .Loop3x_ctr32 | |
637 | |
638 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
639 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 | |
640 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 | |
641 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 | |
642 vld1.8 {q2},[r0]! | |
643 vorr q0,q6,q6 | |
644 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 | |
645 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 | |
646 vld1.8 {q3},[r0]! | |
647 vorr q1,q6,q6 | |
648 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 | |
649 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 | |
650 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 | |
651 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 | |
652 vld1.8 {q11},[r0]! | |
653 mov r7,r3 | |
654 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 | |
655 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 | |
656 vorr q10,q6,q6 | |
657 add r9,r8,#1 | |
658 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 | |
659 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 | |
660 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 | |
661 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 | |
662 veor q2,q2,q7 | |
663 add r10,r8,#2 | |
664 .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 | |
665 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 | |
666 veor q3,q3,q7 | |
667 add r8,r8,#3 | |
668 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 | |
669 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 | |
670 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 | |
671 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 | |
672 veor q11,q11,q7 | |
673 rev r9,r9 | |
674 .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 | |
675 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 | |
676 vmov.32 d1[1], r9 | |
677 rev r10,r10 | |
678 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 | |
679 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 | |
680 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 | |
681 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 | |
682 vmov.32 d3[1], r10 | |
683 rev r12,r8 | |
684 .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 | |
685 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 | |
686 vmov.32 d21[1], r12 | |
687 subs r2,r2,#3 | |
688 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 | |
689 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 | |
690 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 | |
691 | |
692 veor q2,q2,q4 | |
693 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] | |
694 vst1.8 {q2},[r1]! | |
695 veor q3,q3,q5 | |
696 mov r6,r5 | |
697 vst1.8 {q3},[r1]! | |
698 veor q11,q11,q9 | |
699 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] | |
700 vst1.8 {q11},[r1]! | |
701 bhs .Loop3x_ctr32 | |
702 | |
703 adds r2,r2,#3 | |
704 beq .Lctr32_done | |
705 cmp r2,#1 | |
706 mov r12,#16 | |
707 moveq r12,#0 | |
708 | |
709 .Lctr32_tail: | |
710 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
711 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
712 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 | |
713 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
714 vld1.32 {q8},[r7]! | |
715 subs r6,r6,#2 | |
716 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
717 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
718 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 | |
719 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
720 vld1.32 {q9},[r7]! | |
721 bgt .Lctr32_tail | |
722 | |
723 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
724 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
725 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 | |
726 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
727 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
728 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
729 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 | |
730 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
731 vld1.8 {q2},[r0],r12 | |
732 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 | |
733 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
734 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 | |
735 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
736 vld1.8 {q3},[r0] | |
737 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 | |
738 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
739 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 | |
740 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
741 veor q2,q2,q7 | |
742 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 | |
743 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
744 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 | |
745 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
746 veor q3,q3,q7 | |
747 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 | |
748 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 | |
749 | |
750 cmp r2,#1 | |
751 veor q2,q2,q0 | |
752 veor q3,q3,q1 | |
753 vst1.8 {q2},[r1]! | |
754 beq .Lctr32_done | |
755 vst1.8 {q3},[r1] | |
756 | |
757 .Lctr32_done: | |
758 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} | |
759 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} | |
760 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks | |
761 #endif | |
762 #endif | |
OLD | NEW |