OLD | NEW |
| (Empty) |
1 #if defined(__aarch64__) | |
2 #include <openssl/arm_arch.h> | |
3 | |
4 #if __ARM_MAX_ARCH__>=7 | |
5 .text | |
6 #if !defined(__clang__) | |
7 .arch armv8-a+crypto | |
8 #endif | |
9 .align 5 | |
10 .Lrcon: | |
11 .long 0x01,0x01,0x01,0x01 | |
12 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat | |
13 .long 0x1b,0x1b,0x1b,0x1b | |
14 | |
15 .globl aes_v8_set_encrypt_key | |
16 .hidden aes_v8_set_encrypt_key | |
17 .type aes_v8_set_encrypt_key,%function | |
18 .align 5 | |
19 aes_v8_set_encrypt_key: | |
20 .Lenc_key: | |
21 stp x29,x30,[sp,#-16]! | |
22 add x29,sp,#0 | |
23 mov x3,#-1 | |
24 cmp x0,#0 | |
25 b.eq .Lenc_key_abort | |
26 cmp x2,#0 | |
27 b.eq .Lenc_key_abort | |
28 mov x3,#-2 | |
29 cmp w1,#128 | |
30 b.lt .Lenc_key_abort | |
31 cmp w1,#256 | |
32 b.gt .Lenc_key_abort | |
33 tst w1,#0x3f | |
34 b.ne .Lenc_key_abort | |
35 | |
36 adr x3,.Lrcon | |
37 cmp w1,#192 | |
38 | |
39 eor v0.16b,v0.16b,v0.16b | |
40 ld1 {v3.16b},[x0],#16 | |
41 mov w1,#8 // reuse w1 | |
42 ld1 {v1.4s,v2.4s},[x3],#32 | |
43 | |
44 b.lt .Loop128 | |
45 b.eq .L192 | |
46 b .L256 | |
47 | |
48 .align 4 | |
49 .Loop128: | |
50 tbl v6.16b,{v3.16b},v2.16b | |
51 ext v5.16b,v0.16b,v3.16b,#12 | |
52 st1 {v3.4s},[x2],#16 | |
53 aese v6.16b,v0.16b | |
54 subs w1,w1,#1 | |
55 | |
56 eor v3.16b,v3.16b,v5.16b | |
57 ext v5.16b,v0.16b,v5.16b,#12 | |
58 eor v3.16b,v3.16b,v5.16b | |
59 ext v5.16b,v0.16b,v5.16b,#12 | |
60 eor v6.16b,v6.16b,v1.16b | |
61 eor v3.16b,v3.16b,v5.16b | |
62 shl v1.16b,v1.16b,#1 | |
63 eor v3.16b,v3.16b,v6.16b | |
64 b.ne .Loop128 | |
65 | |
66 ld1 {v1.4s},[x3] | |
67 | |
68 tbl v6.16b,{v3.16b},v2.16b | |
69 ext v5.16b,v0.16b,v3.16b,#12 | |
70 st1 {v3.4s},[x2],#16 | |
71 aese v6.16b,v0.16b | |
72 | |
73 eor v3.16b,v3.16b,v5.16b | |
74 ext v5.16b,v0.16b,v5.16b,#12 | |
75 eor v3.16b,v3.16b,v5.16b | |
76 ext v5.16b,v0.16b,v5.16b,#12 | |
77 eor v6.16b,v6.16b,v1.16b | |
78 eor v3.16b,v3.16b,v5.16b | |
79 shl v1.16b,v1.16b,#1 | |
80 eor v3.16b,v3.16b,v6.16b | |
81 | |
82 tbl v6.16b,{v3.16b},v2.16b | |
83 ext v5.16b,v0.16b,v3.16b,#12 | |
84 st1 {v3.4s},[x2],#16 | |
85 aese v6.16b,v0.16b | |
86 | |
87 eor v3.16b,v3.16b,v5.16b | |
88 ext v5.16b,v0.16b,v5.16b,#12 | |
89 eor v3.16b,v3.16b,v5.16b | |
90 ext v5.16b,v0.16b,v5.16b,#12 | |
91 eor v6.16b,v6.16b,v1.16b | |
92 eor v3.16b,v3.16b,v5.16b | |
93 eor v3.16b,v3.16b,v6.16b | |
94 st1 {v3.4s},[x2] | |
95 add x2,x2,#0x50 | |
96 | |
97 mov w12,#10 | |
98 b .Ldone | |
99 | |
100 .align 4 | |
101 .L192: | |
102 ld1 {v4.8b},[x0],#8 | |
103 movi v6.16b,#8 // borrow v6.16b | |
104 st1 {v3.4s},[x2],#16 | |
105 sub v2.16b,v2.16b,v6.16b // adjust the mask | |
106 | |
107 .Loop192: | |
108 tbl v6.16b,{v4.16b},v2.16b | |
109 ext v5.16b,v0.16b,v3.16b,#12 | |
110 st1 {v4.8b},[x2],#8 | |
111 aese v6.16b,v0.16b | |
112 subs w1,w1,#1 | |
113 | |
114 eor v3.16b,v3.16b,v5.16b | |
115 ext v5.16b,v0.16b,v5.16b,#12 | |
116 eor v3.16b,v3.16b,v5.16b | |
117 ext v5.16b,v0.16b,v5.16b,#12 | |
118 eor v3.16b,v3.16b,v5.16b | |
119 | |
120 dup v5.4s,v3.s[3] | |
121 eor v5.16b,v5.16b,v4.16b | |
122 eor v6.16b,v6.16b,v1.16b | |
123 ext v4.16b,v0.16b,v4.16b,#12 | |
124 shl v1.16b,v1.16b,#1 | |
125 eor v4.16b,v4.16b,v5.16b | |
126 eor v3.16b,v3.16b,v6.16b | |
127 eor v4.16b,v4.16b,v6.16b | |
128 st1 {v3.4s},[x2],#16 | |
129 b.ne .Loop192 | |
130 | |
131 mov w12,#12 | |
132 add x2,x2,#0x20 | |
133 b .Ldone | |
134 | |
135 .align 4 | |
136 .L256: | |
137 ld1 {v4.16b},[x0] | |
138 mov w1,#7 | |
139 mov w12,#14 | |
140 st1 {v3.4s},[x2],#16 | |
141 | |
142 .Loop256: | |
143 tbl v6.16b,{v4.16b},v2.16b | |
144 ext v5.16b,v0.16b,v3.16b,#12 | |
145 st1 {v4.4s},[x2],#16 | |
146 aese v6.16b,v0.16b | |
147 subs w1,w1,#1 | |
148 | |
149 eor v3.16b,v3.16b,v5.16b | |
150 ext v5.16b,v0.16b,v5.16b,#12 | |
151 eor v3.16b,v3.16b,v5.16b | |
152 ext v5.16b,v0.16b,v5.16b,#12 | |
153 eor v6.16b,v6.16b,v1.16b | |
154 eor v3.16b,v3.16b,v5.16b | |
155 shl v1.16b,v1.16b,#1 | |
156 eor v3.16b,v3.16b,v6.16b | |
157 st1 {v3.4s},[x2],#16 | |
158 b.eq .Ldone | |
159 | |
160 dup v6.4s,v3.s[3] // just splat | |
161 ext v5.16b,v0.16b,v4.16b,#12 | |
162 aese v6.16b,v0.16b | |
163 | |
164 eor v4.16b,v4.16b,v5.16b | |
165 ext v5.16b,v0.16b,v5.16b,#12 | |
166 eor v4.16b,v4.16b,v5.16b | |
167 ext v5.16b,v0.16b,v5.16b,#12 | |
168 eor v4.16b,v4.16b,v5.16b | |
169 | |
170 eor v4.16b,v4.16b,v6.16b | |
171 b .Loop256 | |
172 | |
173 .Ldone: | |
174 str w12,[x2] | |
175 mov x3,#0 | |
176 | |
177 .Lenc_key_abort: | |
178 mov x0,x3 // return value | |
179 ldr x29,[sp],#16 | |
180 ret | |
181 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key | |
182 | |
183 .globl aes_v8_set_decrypt_key | |
184 .hidden aes_v8_set_decrypt_key | |
185 .type aes_v8_set_decrypt_key,%function | |
186 .align 5 | |
187 aes_v8_set_decrypt_key: | |
188 stp x29,x30,[sp,#-16]! | |
189 add x29,sp,#0 | |
190 bl .Lenc_key | |
191 | |
192 cmp x0,#0 | |
193 b.ne .Ldec_key_abort | |
194 | |
195 sub x2,x2,#240 // restore original x2 | |
196 mov x4,#-16 | |
197 add x0,x2,x12,lsl#4 // end of key schedule | |
198 | |
199 ld1 {v0.4s},[x2] | |
200 ld1 {v1.4s},[x0] | |
201 st1 {v0.4s},[x0],x4 | |
202 st1 {v1.4s},[x2],#16 | |
203 | |
204 .Loop_imc: | |
205 ld1 {v0.4s},[x2] | |
206 ld1 {v1.4s},[x0] | |
207 aesimc v0.16b,v0.16b | |
208 aesimc v1.16b,v1.16b | |
209 st1 {v0.4s},[x0],x4 | |
210 st1 {v1.4s},[x2],#16 | |
211 cmp x0,x2 | |
212 b.hi .Loop_imc | |
213 | |
214 ld1 {v0.4s},[x2] | |
215 aesimc v0.16b,v0.16b | |
216 st1 {v0.4s},[x0] | |
217 | |
218 eor x0,x0,x0 // return value | |
219 .Ldec_key_abort: | |
220 ldp x29,x30,[sp],#16 | |
221 ret | |
222 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key | |
223 .globl aes_v8_encrypt | |
224 .hidden aes_v8_encrypt | |
225 .type aes_v8_encrypt,%function | |
226 .align 5 | |
227 aes_v8_encrypt: | |
228 ldr w3,[x2,#240] | |
229 ld1 {v0.4s},[x2],#16 | |
230 ld1 {v2.16b},[x0] | |
231 sub w3,w3,#2 | |
232 ld1 {v1.4s},[x2],#16 | |
233 | |
234 .Loop_enc: | |
235 aese v2.16b,v0.16b | |
236 aesmc v2.16b,v2.16b | |
237 ld1 {v0.4s},[x2],#16 | |
238 subs w3,w3,#2 | |
239 aese v2.16b,v1.16b | |
240 aesmc v2.16b,v2.16b | |
241 ld1 {v1.4s},[x2],#16 | |
242 b.gt .Loop_enc | |
243 | |
244 aese v2.16b,v0.16b | |
245 aesmc v2.16b,v2.16b | |
246 ld1 {v0.4s},[x2] | |
247 aese v2.16b,v1.16b | |
248 eor v2.16b,v2.16b,v0.16b | |
249 | |
250 st1 {v2.16b},[x1] | |
251 ret | |
252 .size aes_v8_encrypt,.-aes_v8_encrypt | |
253 .globl aes_v8_decrypt | |
254 .hidden aes_v8_decrypt | |
255 .type aes_v8_decrypt,%function | |
256 .align 5 | |
257 aes_v8_decrypt: | |
258 ldr w3,[x2,#240] | |
259 ld1 {v0.4s},[x2],#16 | |
260 ld1 {v2.16b},[x0] | |
261 sub w3,w3,#2 | |
262 ld1 {v1.4s},[x2],#16 | |
263 | |
264 .Loop_dec: | |
265 aesd v2.16b,v0.16b | |
266 aesimc v2.16b,v2.16b | |
267 ld1 {v0.4s},[x2],#16 | |
268 subs w3,w3,#2 | |
269 aesd v2.16b,v1.16b | |
270 aesimc v2.16b,v2.16b | |
271 ld1 {v1.4s},[x2],#16 | |
272 b.gt .Loop_dec | |
273 | |
274 aesd v2.16b,v0.16b | |
275 aesimc v2.16b,v2.16b | |
276 ld1 {v0.4s},[x2] | |
277 aesd v2.16b,v1.16b | |
278 eor v2.16b,v2.16b,v0.16b | |
279 | |
280 st1 {v2.16b},[x1] | |
281 ret | |
282 .size aes_v8_decrypt,.-aes_v8_decrypt | |
283 .globl aes_v8_cbc_encrypt | |
284 .hidden aes_v8_cbc_encrypt | |
285 .type aes_v8_cbc_encrypt,%function | |
286 .align 5 | |
287 aes_v8_cbc_encrypt: | |
288 stp x29,x30,[sp,#-16]! | |
289 add x29,sp,#0 | |
290 subs x2,x2,#16 | |
291 mov x8,#16 | |
292 b.lo .Lcbc_abort | |
293 csel x8,xzr,x8,eq | |
294 | |
295 cmp w5,#0 // en- or decrypting? | |
296 ldr w5,[x3,#240] | |
297 and x2,x2,#-16 | |
298 ld1 {v6.16b},[x4] | |
299 ld1 {v0.16b},[x0],x8 | |
300 | |
301 ld1 {v16.4s,v17.4s},[x3] // load key schedule... | |
302 sub w5,w5,#6 | |
303 add x7,x3,x5,lsl#4 // pointer to last 7 round keys | |
304 sub w5,w5,#2 | |
305 ld1 {v18.4s,v19.4s},[x7],#32 | |
306 ld1 {v20.4s,v21.4s},[x7],#32 | |
307 ld1 {v22.4s,v23.4s},[x7],#32 | |
308 ld1 {v7.4s},[x7] | |
309 | |
310 add x7,x3,#32 | |
311 mov w6,w5 | |
312 b.eq .Lcbc_dec | |
313 | |
314 cmp w5,#2 | |
315 eor v0.16b,v0.16b,v6.16b | |
316 eor v5.16b,v16.16b,v7.16b | |
317 b.eq .Lcbc_enc128 | |
318 | |
319 ld1 {v2.4s,v3.4s},[x7] | |
320 add x7,x3,#16 | |
321 add x6,x3,#16*4 | |
322 add x12,x3,#16*5 | |
323 aese v0.16b,v16.16b | |
324 aesmc v0.16b,v0.16b | |
325 add x14,x3,#16*6 | |
326 add x3,x3,#16*7 | |
327 b .Lenter_cbc_enc | |
328 | |
329 .align 4 | |
330 .Loop_cbc_enc: | |
331 aese v0.16b,v16.16b | |
332 aesmc v0.16b,v0.16b | |
333 st1 {v6.16b},[x1],#16 | |
334 .Lenter_cbc_enc: | |
335 aese v0.16b,v17.16b | |
336 aesmc v0.16b,v0.16b | |
337 aese v0.16b,v2.16b | |
338 aesmc v0.16b,v0.16b | |
339 ld1 {v16.4s},[x6] | |
340 cmp w5,#4 | |
341 aese v0.16b,v3.16b | |
342 aesmc v0.16b,v0.16b | |
343 ld1 {v17.4s},[x12] | |
344 b.eq .Lcbc_enc192 | |
345 | |
346 aese v0.16b,v16.16b | |
347 aesmc v0.16b,v0.16b | |
348 ld1 {v16.4s},[x14] | |
349 aese v0.16b,v17.16b | |
350 aesmc v0.16b,v0.16b | |
351 ld1 {v17.4s},[x3] | |
352 nop | |
353 | |
354 .Lcbc_enc192: | |
355 aese v0.16b,v16.16b | |
356 aesmc v0.16b,v0.16b | |
357 subs x2,x2,#16 | |
358 aese v0.16b,v17.16b | |
359 aesmc v0.16b,v0.16b | |
360 csel x8,xzr,x8,eq | |
361 aese v0.16b,v18.16b | |
362 aesmc v0.16b,v0.16b | |
363 aese v0.16b,v19.16b | |
364 aesmc v0.16b,v0.16b | |
365 ld1 {v16.16b},[x0],x8 | |
366 aese v0.16b,v20.16b | |
367 aesmc v0.16b,v0.16b | |
368 eor v16.16b,v16.16b,v5.16b | |
369 aese v0.16b,v21.16b | |
370 aesmc v0.16b,v0.16b | |
371 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] | |
372 aese v0.16b,v22.16b | |
373 aesmc v0.16b,v0.16b | |
374 aese v0.16b,v23.16b | |
375 eor v6.16b,v0.16b,v7.16b | |
376 b.hs .Loop_cbc_enc | |
377 | |
378 st1 {v6.16b},[x1],#16 | |
379 b .Lcbc_done | |
380 | |
381 .align 5 | |
382 .Lcbc_enc128: | |
383 ld1 {v2.4s,v3.4s},[x7] | |
384 aese v0.16b,v16.16b | |
385 aesmc v0.16b,v0.16b | |
386 b .Lenter_cbc_enc128 | |
387 .Loop_cbc_enc128: | |
388 aese v0.16b,v16.16b | |
389 aesmc v0.16b,v0.16b | |
390 st1 {v6.16b},[x1],#16 | |
391 .Lenter_cbc_enc128: | |
392 aese v0.16b,v17.16b | |
393 aesmc v0.16b,v0.16b | |
394 subs x2,x2,#16 | |
395 aese v0.16b,v2.16b | |
396 aesmc v0.16b,v0.16b | |
397 csel x8,xzr,x8,eq | |
398 aese v0.16b,v3.16b | |
399 aesmc v0.16b,v0.16b | |
400 aese v0.16b,v18.16b | |
401 aesmc v0.16b,v0.16b | |
402 aese v0.16b,v19.16b | |
403 aesmc v0.16b,v0.16b | |
404 ld1 {v16.16b},[x0],x8 | |
405 aese v0.16b,v20.16b | |
406 aesmc v0.16b,v0.16b | |
407 aese v0.16b,v21.16b | |
408 aesmc v0.16b,v0.16b | |
409 aese v0.16b,v22.16b | |
410 aesmc v0.16b,v0.16b | |
411 eor v16.16b,v16.16b,v5.16b | |
412 aese v0.16b,v23.16b | |
413 eor v6.16b,v0.16b,v7.16b | |
414 b.hs .Loop_cbc_enc128 | |
415 | |
416 st1 {v6.16b},[x1],#16 | |
417 b .Lcbc_done | |
418 .align 5 | |
419 .Lcbc_dec: | |
420 ld1 {v18.16b},[x0],#16 | |
421 subs x2,x2,#32 // bias | |
422 add w6,w5,#2 | |
423 orr v3.16b,v0.16b,v0.16b | |
424 orr v1.16b,v0.16b,v0.16b | |
425 orr v19.16b,v18.16b,v18.16b | |
426 b.lo .Lcbc_dec_tail | |
427 | |
428 orr v1.16b,v18.16b,v18.16b | |
429 ld1 {v18.16b},[x0],#16 | |
430 orr v2.16b,v0.16b,v0.16b | |
431 orr v3.16b,v1.16b,v1.16b | |
432 orr v19.16b,v18.16b,v18.16b | |
433 | |
434 .Loop3x_cbc_dec: | |
435 aesd v0.16b,v16.16b | |
436 aesimc v0.16b,v0.16b | |
437 aesd v1.16b,v16.16b | |
438 aesimc v1.16b,v1.16b | |
439 aesd v18.16b,v16.16b | |
440 aesimc v18.16b,v18.16b | |
441 ld1 {v16.4s},[x7],#16 | |
442 subs w6,w6,#2 | |
443 aesd v0.16b,v17.16b | |
444 aesimc v0.16b,v0.16b | |
445 aesd v1.16b,v17.16b | |
446 aesimc v1.16b,v1.16b | |
447 aesd v18.16b,v17.16b | |
448 aesimc v18.16b,v18.16b | |
449 ld1 {v17.4s},[x7],#16 | |
450 b.gt .Loop3x_cbc_dec | |
451 | |
452 aesd v0.16b,v16.16b | |
453 aesimc v0.16b,v0.16b | |
454 aesd v1.16b,v16.16b | |
455 aesimc v1.16b,v1.16b | |
456 aesd v18.16b,v16.16b | |
457 aesimc v18.16b,v18.16b | |
458 eor v4.16b,v6.16b,v7.16b | |
459 subs x2,x2,#0x30 | |
460 eor v5.16b,v2.16b,v7.16b | |
461 csel x6,x2,x6,lo // x6, w6, is zero at this point | |
462 aesd v0.16b,v17.16b | |
463 aesimc v0.16b,v0.16b | |
464 aesd v1.16b,v17.16b | |
465 aesimc v1.16b,v1.16b | |
466 aesd v18.16b,v17.16b | |
467 aesimc v18.16b,v18.16b | |
468 eor v17.16b,v3.16b,v7.16b | |
469 add x0,x0,x6 // x0 is adjusted in such way that | |
470 // at exit from the loop v1.16b-v18.16b | |
471 // are loaded with last "words" | |
472 orr v6.16b,v19.16b,v19.16b | |
473 mov x7,x3 | |
474 aesd v0.16b,v20.16b | |
475 aesimc v0.16b,v0.16b | |
476 aesd v1.16b,v20.16b | |
477 aesimc v1.16b,v1.16b | |
478 aesd v18.16b,v20.16b | |
479 aesimc v18.16b,v18.16b | |
480 ld1 {v2.16b},[x0],#16 | |
481 aesd v0.16b,v21.16b | |
482 aesimc v0.16b,v0.16b | |
483 aesd v1.16b,v21.16b | |
484 aesimc v1.16b,v1.16b | |
485 aesd v18.16b,v21.16b | |
486 aesimc v18.16b,v18.16b | |
487 ld1 {v3.16b},[x0],#16 | |
488 aesd v0.16b,v22.16b | |
489 aesimc v0.16b,v0.16b | |
490 aesd v1.16b,v22.16b | |
491 aesimc v1.16b,v1.16b | |
492 aesd v18.16b,v22.16b | |
493 aesimc v18.16b,v18.16b | |
494 ld1 {v19.16b},[x0],#16 | |
495 aesd v0.16b,v23.16b | |
496 aesd v1.16b,v23.16b | |
497 aesd v18.16b,v23.16b | |
498 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] | |
499 add w6,w5,#2 | |
500 eor v4.16b,v4.16b,v0.16b | |
501 eor v5.16b,v5.16b,v1.16b | |
502 eor v18.16b,v18.16b,v17.16b | |
503 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] | |
504 st1 {v4.16b},[x1],#16 | |
505 orr v0.16b,v2.16b,v2.16b | |
506 st1 {v5.16b},[x1],#16 | |
507 orr v1.16b,v3.16b,v3.16b | |
508 st1 {v18.16b},[x1],#16 | |
509 orr v18.16b,v19.16b,v19.16b | |
510 b.hs .Loop3x_cbc_dec | |
511 | |
512 cmn x2,#0x30 | |
513 b.eq .Lcbc_done | |
514 nop | |
515 | |
516 .Lcbc_dec_tail: | |
517 aesd v1.16b,v16.16b | |
518 aesimc v1.16b,v1.16b | |
519 aesd v18.16b,v16.16b | |
520 aesimc v18.16b,v18.16b | |
521 ld1 {v16.4s},[x7],#16 | |
522 subs w6,w6,#2 | |
523 aesd v1.16b,v17.16b | |
524 aesimc v1.16b,v1.16b | |
525 aesd v18.16b,v17.16b | |
526 aesimc v18.16b,v18.16b | |
527 ld1 {v17.4s},[x7],#16 | |
528 b.gt .Lcbc_dec_tail | |
529 | |
530 aesd v1.16b,v16.16b | |
531 aesimc v1.16b,v1.16b | |
532 aesd v18.16b,v16.16b | |
533 aesimc v18.16b,v18.16b | |
534 aesd v1.16b,v17.16b | |
535 aesimc v1.16b,v1.16b | |
536 aesd v18.16b,v17.16b | |
537 aesimc v18.16b,v18.16b | |
538 aesd v1.16b,v20.16b | |
539 aesimc v1.16b,v1.16b | |
540 aesd v18.16b,v20.16b | |
541 aesimc v18.16b,v18.16b | |
542 cmn x2,#0x20 | |
543 aesd v1.16b,v21.16b | |
544 aesimc v1.16b,v1.16b | |
545 aesd v18.16b,v21.16b | |
546 aesimc v18.16b,v18.16b | |
547 eor v5.16b,v6.16b,v7.16b | |
548 aesd v1.16b,v22.16b | |
549 aesimc v1.16b,v1.16b | |
550 aesd v18.16b,v22.16b | |
551 aesimc v18.16b,v18.16b | |
552 eor v17.16b,v3.16b,v7.16b | |
553 aesd v1.16b,v23.16b | |
554 aesd v18.16b,v23.16b | |
555 b.eq .Lcbc_dec_one | |
556 eor v5.16b,v5.16b,v1.16b | |
557 eor v17.16b,v17.16b,v18.16b | |
558 orr v6.16b,v19.16b,v19.16b | |
559 st1 {v5.16b},[x1],#16 | |
560 st1 {v17.16b},[x1],#16 | |
561 b .Lcbc_done | |
562 | |
563 .Lcbc_dec_one: | |
564 eor v5.16b,v5.16b,v18.16b | |
565 orr v6.16b,v19.16b,v19.16b | |
566 st1 {v5.16b},[x1],#16 | |
567 | |
568 .Lcbc_done: | |
569 st1 {v6.16b},[x4] | |
570 .Lcbc_abort: | |
571 ldr x29,[sp],#16 | |
572 ret | |
573 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt | |
574 .globl aes_v8_ctr32_encrypt_blocks | |
575 .hidden aes_v8_ctr32_encrypt_blocks | |
576 .type aes_v8_ctr32_encrypt_blocks,%function | |
577 .align 5 | |
578 aes_v8_ctr32_encrypt_blocks: | |
579 stp x29,x30,[sp,#-16]! | |
580 add x29,sp,#0 | |
581 ldr w5,[x3,#240] | |
582 | |
583 ldr w8, [x4, #12] | |
584 ld1 {v0.4s},[x4] | |
585 | |
586 ld1 {v16.4s,v17.4s},[x3] // load key schedule... | |
587 sub w5,w5,#4 | |
588 mov x12,#16 | |
589 cmp x2,#2 | |
590 add x7,x3,x5,lsl#4 // pointer to last 5 round keys | |
591 sub w5,w5,#2 | |
592 ld1 {v20.4s,v21.4s},[x7],#32 | |
593 ld1 {v22.4s,v23.4s},[x7],#32 | |
594 ld1 {v7.4s},[x7] | |
595 add x7,x3,#32 | |
596 mov w6,w5 | |
597 csel x12,xzr,x12,lo | |
598 #ifndef __ARMEB__ | |
599 rev w8, w8 | |
600 #endif | |
601 orr v1.16b,v0.16b,v0.16b | |
602 add w10, w8, #1 | |
603 orr v18.16b,v0.16b,v0.16b | |
604 add w8, w8, #2 | |
605 orr v6.16b,v0.16b,v0.16b | |
606 rev w10, w10 | |
607 mov v1.s[3],w10 | |
608 b.ls .Lctr32_tail | |
609 rev w12, w8 | |
610 sub x2,x2,#3 // bias | |
611 mov v18.s[3],w12 | |
612 b .Loop3x_ctr32 | |
613 | |
614 .align 4 | |
615 .Loop3x_ctr32: | |
616 aese v0.16b,v16.16b | |
617 aesmc v0.16b,v0.16b | |
618 aese v1.16b,v16.16b | |
619 aesmc v1.16b,v1.16b | |
620 aese v18.16b,v16.16b | |
621 aesmc v18.16b,v18.16b | |
622 ld1 {v16.4s},[x7],#16 | |
623 subs w6,w6,#2 | |
624 aese v0.16b,v17.16b | |
625 aesmc v0.16b,v0.16b | |
626 aese v1.16b,v17.16b | |
627 aesmc v1.16b,v1.16b | |
628 aese v18.16b,v17.16b | |
629 aesmc v18.16b,v18.16b | |
630 ld1 {v17.4s},[x7],#16 | |
631 b.gt .Loop3x_ctr32 | |
632 | |
633 aese v0.16b,v16.16b | |
634 aesmc v4.16b,v0.16b | |
635 aese v1.16b,v16.16b | |
636 aesmc v5.16b,v1.16b | |
637 ld1 {v2.16b},[x0],#16 | |
638 orr v0.16b,v6.16b,v6.16b | |
639 aese v18.16b,v16.16b | |
640 aesmc v18.16b,v18.16b | |
641 ld1 {v3.16b},[x0],#16 | |
642 orr v1.16b,v6.16b,v6.16b | |
643 aese v4.16b,v17.16b | |
644 aesmc v4.16b,v4.16b | |
645 aese v5.16b,v17.16b | |
646 aesmc v5.16b,v5.16b | |
647 ld1 {v19.16b},[x0],#16 | |
648 mov x7,x3 | |
649 aese v18.16b,v17.16b | |
650 aesmc v17.16b,v18.16b | |
651 orr v18.16b,v6.16b,v6.16b | |
652 add w9,w8,#1 | |
653 aese v4.16b,v20.16b | |
654 aesmc v4.16b,v4.16b | |
655 aese v5.16b,v20.16b | |
656 aesmc v5.16b,v5.16b | |
657 eor v2.16b,v2.16b,v7.16b | |
658 add w10,w8,#2 | |
659 aese v17.16b,v20.16b | |
660 aesmc v17.16b,v17.16b | |
661 eor v3.16b,v3.16b,v7.16b | |
662 add w8,w8,#3 | |
663 aese v4.16b,v21.16b | |
664 aesmc v4.16b,v4.16b | |
665 aese v5.16b,v21.16b | |
666 aesmc v5.16b,v5.16b | |
667 eor v19.16b,v19.16b,v7.16b | |
668 rev w9,w9 | |
669 aese v17.16b,v21.16b | |
670 aesmc v17.16b,v17.16b | |
671 mov v0.s[3], w9 | |
672 rev w10,w10 | |
673 aese v4.16b,v22.16b | |
674 aesmc v4.16b,v4.16b | |
675 aese v5.16b,v22.16b | |
676 aesmc v5.16b,v5.16b | |
677 mov v1.s[3], w10 | |
678 rev w12,w8 | |
679 aese v17.16b,v22.16b | |
680 aesmc v17.16b,v17.16b | |
681 mov v18.s[3], w12 | |
682 subs x2,x2,#3 | |
683 aese v4.16b,v23.16b | |
684 aese v5.16b,v23.16b | |
685 aese v17.16b,v23.16b | |
686 | |
687 eor v2.16b,v2.16b,v4.16b | |
688 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] | |
689 st1 {v2.16b},[x1],#16 | |
690 eor v3.16b,v3.16b,v5.16b | |
691 mov w6,w5 | |
692 st1 {v3.16b},[x1],#16 | |
693 eor v19.16b,v19.16b,v17.16b | |
694 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] | |
695 st1 {v19.16b},[x1],#16 | |
696 b.hs .Loop3x_ctr32 | |
697 | |
698 adds x2,x2,#3 | |
699 b.eq .Lctr32_done | |
700 cmp x2,#1 | |
701 mov x12,#16 | |
702 csel x12,xzr,x12,eq | |
703 | |
704 .Lctr32_tail: | |
705 aese v0.16b,v16.16b | |
706 aesmc v0.16b,v0.16b | |
707 aese v1.16b,v16.16b | |
708 aesmc v1.16b,v1.16b | |
709 ld1 {v16.4s},[x7],#16 | |
710 subs w6,w6,#2 | |
711 aese v0.16b,v17.16b | |
712 aesmc v0.16b,v0.16b | |
713 aese v1.16b,v17.16b | |
714 aesmc v1.16b,v1.16b | |
715 ld1 {v17.4s},[x7],#16 | |
716 b.gt .Lctr32_tail | |
717 | |
718 aese v0.16b,v16.16b | |
719 aesmc v0.16b,v0.16b | |
720 aese v1.16b,v16.16b | |
721 aesmc v1.16b,v1.16b | |
722 aese v0.16b,v17.16b | |
723 aesmc v0.16b,v0.16b | |
724 aese v1.16b,v17.16b | |
725 aesmc v1.16b,v1.16b | |
726 ld1 {v2.16b},[x0],x12 | |
727 aese v0.16b,v20.16b | |
728 aesmc v0.16b,v0.16b | |
729 aese v1.16b,v20.16b | |
730 aesmc v1.16b,v1.16b | |
731 ld1 {v3.16b},[x0] | |
732 aese v0.16b,v21.16b | |
733 aesmc v0.16b,v0.16b | |
734 aese v1.16b,v21.16b | |
735 aesmc v1.16b,v1.16b | |
736 eor v2.16b,v2.16b,v7.16b | |
737 aese v0.16b,v22.16b | |
738 aesmc v0.16b,v0.16b | |
739 aese v1.16b,v22.16b | |
740 aesmc v1.16b,v1.16b | |
741 eor v3.16b,v3.16b,v7.16b | |
742 aese v0.16b,v23.16b | |
743 aese v1.16b,v23.16b | |
744 | |
745 cmp x2,#1 | |
746 eor v2.16b,v2.16b,v0.16b | |
747 eor v3.16b,v3.16b,v1.16b | |
748 st1 {v2.16b},[x1],#16 | |
749 b.eq .Lctr32_done | |
750 st1 {v3.16b},[x1] | |
751 | |
752 .Lctr32_done: | |
753 ldr x29,[sp],#16 | |
754 ret | |
755 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks | |
756 #endif | |
757 #endif | |
OLD | NEW |